Update aosp/master clang for rebase to r233350

Change-Id: I12d4823f10bc9e445b8b86e7721b71f98d1df442
diff --git a/test/CodeGen/2005-12-04-DeclarationLineNumbers.c b/test/CodeGen/2005-12-04-DeclarationLineNumbers.c
deleted file mode 100644
index 596d3ee..0000000
--- a/test/CodeGen/2005-12-04-DeclarationLineNumbers.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o - | grep DW_TAG_compile_unit | count 1
-// PR664: ensure that line #'s are emitted for declarations
-
-
-short test(short br_data_0,
-short br_data_1,
-short br_data_2,
-short br_data_3,
-short br_data_4,
-short br_data_5,
-short br_data_6,
-short br_data_7) {
-
-short sm07 = br_data_0 + br_data_7;
-short sm16 = br_data_1 + br_data_6;
-short sm25 = br_data_2 + br_data_5;
-short sm34 = br_data_3 + br_data_4;
-short s0734 = sm07 + sm34;
-short s1625 = sm16 + sm25;
-
-return s0734 + s1625;
-}
-
diff --git a/test/CodeGen/2007-03-27-VarLengthArray.c b/test/CodeGen/2007-03-27-VarLengthArray.c
index ec11f55..db92dab 100644
--- a/test/CodeGen/2007-03-27-VarLengthArray.c
+++ b/test/CodeGen/2007-03-27-VarLengthArray.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
 
-// CHECK: getelementptr inbounds i32* %{{vla|[0-9]}}
+// CHECK: getelementptr inbounds i32, i32* %{{vla|[0-9]}}
 extern void f(int *);
 int e(int m, int n) {
   int x[n];
diff --git a/test/CodeGen/2008-08-07-AlignPadding1.c b/test/CodeGen/2008-08-07-AlignPadding1.c
index 2bb2e61..74468a8 100644
--- a/test/CodeGen/2008-08-07-AlignPadding1.c
+++ b/test/CodeGen/2008-08-07-AlignPadding1.c
@@ -22,7 +22,7 @@
 
 // The idea is that there are 6 undefs in this structure initializer to cover
 // the padding between elements.
-// CHECK: @generations = global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { %union._gc_head* getelementptr inbounds ([3 x %struct.gc_generation]* @generations, i32 0, i32 0, i32 0), %union._gc_head* getelementptr inbounds ([3 x %struct.gc_generation]* @generations, i32 0, i32 0, i32 0), i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { %union._gc_head* bitcast (i8* getelementptr (i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 48) to %union._gc_head*), %union._gc_head* bitcast (i8* getelementptr (i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 48) to %union._gc_head*), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { %union._gc_head* bitcast (i8* getelementptr (i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 96) to %union._gc_head*), %union._gc_head* bitcast (i8* getelementptr (i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 96) to %union._gc_head*), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }]
+// CHECK: @generations = global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { %union._gc_head* getelementptr inbounds ([3 x %struct.gc_generation], [3 x %struct.gc_generation]* @generations, i32 0, i32 0, i32 0), %union._gc_head* getelementptr inbounds ([3 x %struct.gc_generation], [3 x %struct.gc_generation]* @generations, i32 0, i32 0, i32 0), i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { %union._gc_head* bitcast (i8* getelementptr (i8, i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 48) to %union._gc_head*), %union._gc_head* bitcast (i8* getelementptr (i8, i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 48) to %union._gc_head*), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { %union._gc_head* bitcast (i8* getelementptr (i8, i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 96) to %union._gc_head*), %union._gc_head* bitcast (i8* getelementptr (i8, i8* bitcast ([3 x %struct.gc_generation]* @generations to i8*), i64 96) to %union._gc_head*), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }]
 /* linked lists of container objects */
 struct gc_generation generations[3] = {
         /* PyGC_Head,                           threshold,      count */
diff --git a/test/CodeGen/2009-02-13-zerosize-union-field.c b/test/CodeGen/2009-02-13-zerosize-union-field.c
index b39a231..e6f0247 100644
--- a/test/CodeGen/2009-02-13-zerosize-union-field.c
+++ b/test/CodeGen/2009-02-13-zerosize-union-field.c
@@ -5,12 +5,12 @@
 typedef union{int x; Foo:0;}b;
 extern int printf(const char*, ...);
 int main() {
-  // CHECK: getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 0
+  // CHECK: getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 0
   printf("%ld\n", sizeof(a));
-  // CHECK: getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 1
+  // CHECK: getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 1
   printf("%ld\n", __alignof__(a));
-  // CHECK: getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 4
+  // CHECK: getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 4
   printf("%ld\n", sizeof(b));
-  // CHECK: getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 4
+  // CHECK: getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 4
   printf("%ld\n", __alignof__(b));
 }
diff --git a/test/CodeGen/2009-10-20-GlobalDebug.c b/test/CodeGen/2009-10-20-GlobalDebug.c
index e56f227..44be13a 100644
--- a/test/CodeGen/2009-10-20-GlobalDebug.c
+++ b/test/CodeGen/2009-10-20-GlobalDebug.c
@@ -6,5 +6,11 @@
   return 0;
 }
 
-// CHECK:  !"0x34\00localstatic\00localstatic\00\005\001\001", !{{.*}}, !{{.*}}, !{{.*}}, i32* @main.localstatic, null} ; [ DW_TAG_variable ]
-// CHECK:  !"0x34\00global\00global\00\003\000\001", null, !{{.*}}, !{{.*}}, i32* @global, null} ; [ DW_TAG_variable ]
+// CHECK: !MDGlobalVariable(name: "localstatic"
+// CHECK-NOT:               linkageName:
+// CHECK-SAME:              line: 5,
+// CHECK-SAME:              variable: i32* @main.localstatic
+// CHECK: !MDGlobalVariable(name: "global"
+// CHECK-NOT:               linkageName:
+// CHECK-SAME:              line: 3,
+// CHECK-SAME:              variable: i32* @global
diff --git a/test/CodeGen/2010-02-15-DbgStaticVar.c b/test/CodeGen/2010-02-15-DbgStaticVar.c
index 8980b60..60302d6 100644
--- a/test/CodeGen/2010-02-15-DbgStaticVar.c
+++ b/test/CodeGen/2010-02-15-DbgStaticVar.c
@@ -11,4 +11,6 @@
 	int j = foo(1);
 	return 0;
 }
-// CHECK: !"0x34\00b\00b\00\00{{.*}}",
+// CHECK: !MDGlobalVariable(name: "b",
+// CHECK-NOT:               linkageName:
+// CHECK-SAME:              ){{$}}
diff --git a/test/CodeGen/2010-02-16-DbgScopes.c b/test/CodeGen/2010-02-16-DbgScopes.c
index 36484a4..daae53d 100644
--- a/test/CodeGen/2010-02-16-DbgScopes.c
+++ b/test/CodeGen/2010-02-16-DbgScopes.c
@@ -1,9 +1,9 @@
 // RUN: %clang_cc1 -emit-llvm -g < %s | FileCheck %s
 // Test to check number of lexical scope identified in debug info.
-// CHECK: DW_TAG_lexical_block
-// CHECK: DW_TAG_lexical_block
-// CHECK: DW_TAG_lexical_block
-// CHECK: DW_TAG_lexical_block
+// CHECK: !MDLexicalBlock(
+// CHECK: !MDLexicalBlock(
+// CHECK: !MDLexicalBlock(
+// CHECK: !MDLexicalBlock(
 
 extern int bar();
 extern void foobar();
diff --git a/test/CodeGen/2010-03-09-DbgInfo.c b/test/CodeGen/2010-03-09-DbgInfo.c
index 3541e5f..3a98e4c 100644
--- a/test/CodeGen/2010-03-09-DbgInfo.c
+++ b/test/CodeGen/2010-03-09-DbgInfo.c
@@ -1,2 +1,3 @@
-// RUN: %clang -emit-llvm -S -O0 -g %s -o - | grep DW_TAG_variable
+// RUN: %clang -emit-llvm -S -O0 -g %s -o - | FileCheck %s
+// CHECK: !MDGlobalVariable(
 unsigned char ctable1[1] = { 0001 };
diff --git a/test/CodeGen/2010-03-5-LexicalScope.c b/test/CodeGen/2010-03-5-LexicalScope.c
index 8dc68d7..bec7d84 100644
--- a/test/CodeGen/2010-03-5-LexicalScope.c
+++ b/test/CodeGen/2010-03-5-LexicalScope.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
-// CHECK: DW_TAG_lexical_block
-// CHECK: DW_TAG_lexical_block
+// CHECK: !MDLexicalBlock(
+// CHECK: !MDLexicalBlock(
 int foo(int i) {
 	if (i) {
 		int j = 2;
diff --git a/test/CodeGen/2010-07-14-overconservative-align.c b/test/CodeGen/2010-07-14-overconservative-align.c
index 5c8c056..90e694d 100644
--- a/test/CodeGen/2010-07-14-overconservative-align.c
+++ b/test/CodeGen/2010-07-14-overconservative-align.c
@@ -9,6 +9,6 @@
 
 void func (struct s *s)
 {
-  // CHECK: load %struct.s**{{.*}}align 8
+  // CHECK: load %struct.s*, %struct.s**{{.*}}align 8
   s->word = 0;
 }
diff --git a/test/CodeGen/2010-07-14-ref-off-end.c b/test/CodeGen/2010-07-14-ref-off-end.c
index 580ae88..5ef4076 100644
--- a/test/CodeGen/2010-07-14-ref-off-end.c
+++ b/test/CodeGen/2010-07-14-ref-off-end.c
@@ -14,8 +14,8 @@
 }
 main()
 {
-// CHECK:  getelementptr inbounds [1 x %struct.T]* %s, i32 0, i32 0
-// CHECK:  getelementptr inbounds [1 x %struct.T]* %s, i32 0, i32 0
+// CHECK:  getelementptr inbounds [1 x %struct.T], [1 x %struct.T]* %s, i32 0, i32 0
+// CHECK:  getelementptr inbounds [1 x %struct.T], [1 x %struct.T]* %s, i32 0, i32 0
 struct T t;
 t.i=0xff;
 t.c=0xffff11;
diff --git a/test/CodeGen/2010-08-10-DbgConstant.c b/test/CodeGen/2010-08-10-DbgConstant.c
index 5b8f064..e07a184 100644
--- a/test/CodeGen/2010-08-10-DbgConstant.c
+++ b/test/CodeGen/2010-08-10-DbgConstant.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -S -emit-llvm -g  %s -o - | grep DW_TAG_variable
+// RUN: %clang_cc1 -S -emit-llvm -g  %s -o - | FileCheck %s
+// CHECK: !MDGlobalVariable(
 
 static const unsigned int ro = 201;
 void bar(int);
diff --git a/test/CodeGen/24-bit.c b/test/CodeGen/24-bit.c
index 9dd0157..ad3076a 100644
--- a/test/CodeGen/24-bit.c
+++ b/test/CodeGen/24-bit.c
@@ -9,6 +9,6 @@
 void callee_ibt0f(union ibtt2 ibtp5);
 
 void test(void) {
-// CHECK: = load i32*
+// CHECK: = load i32, i32*
   callee_ibt0f(ibt15);
 }
diff --git a/test/CodeGen/aarch64-inline-asm.c b/test/CodeGen/aarch64-inline-asm.c
index c7ce375..a1078f1 100644
--- a/test/CodeGen/aarch64-inline-asm.c
+++ b/test/CodeGen/aarch64-inline-asm.c
@@ -8,11 +8,11 @@
 
 void test_generic_constraints(int var32, long var64) {
     asm("add %0, %1, %1" : "=r"(var32) : "0"(var32));
-// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i32*
+// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i32, i32*
 // CHECK: call i32 asm "add $0, $1, $1", "=r,0"(i32 [[R32_ARG]])
 
     asm("add %0, %1, %1" : "=r"(var64) : "0"(var64));
-// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i64*
+// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i64, i64*
 // CHECK: call i64 asm "add $0, $1, $1", "=r,0"(i64 [[R32_ARG]])
 
     asm("ldr %0, %1" : "=r"(var32) : "m"(var));
@@ -25,11 +25,11 @@
 double d;
 void test_constraint_w() {
     asm("fadd %s0, %s1, %s1" : "=w"(f) : "w"(f));
-// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load float* @f
+// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load float, float* @f
 // CHECK: call float asm "fadd ${0:s}, ${1:s}, ${1:s}", "=w,w"(float [[FLT_ARG]])
 
     asm("fadd %d0, %d1, %d1" : "=w"(d) : "w"(d));
-// CHECK: [[DBL_ARG:%[a-zA-Z_0-9]+]] = load double* @d
+// CHECK: [[DBL_ARG:%[a-zA-Z_0-9]+]] = load double, double* @d
 // CHECK: call double asm "fadd ${0:d}, ${1:d}, ${1:d}", "=w,w"(double [[DBL_ARG]])
 }
 
diff --git a/test/CodeGen/aarch64-varargs.c b/test/CodeGen/aarch64-varargs.c
index 248f1c1..4343371 100644
--- a/test/CodeGen/aarch64-varargs.c
+++ b/test/CodeGen/aarch64-varargs.c
@@ -11,19 +11,19 @@
 int simple_int(void) {
 // CHECK-LABEL: define i32 @simple_int
   return va_arg(the_list, int);
-// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
 
 // CHECK: [[VAARG_MAYBE_REG]]
 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
-// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
 
 // CHECK: [[VAARG_IN_REG]]
-// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
 // CHECK-BE: [[REG_ADDR_VAL:%[0-9]+]] = ptrtoint i8* [[REG_ADDR]] to i64
 // CHECK-BE: [[REG_ADDR_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[REG_ADDR_VAL]], 4
 // CHECK-BE: [[REG_ADDR:%[0-9]+]] = inttoptr i64 [[REG_ADDR_VAL_ALIGNED]] to i8*
@@ -31,9 +31,9 @@
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
-// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
-// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK-BE: [[STACK_VAL:%[0-9]+]] = ptrtoint i8* [[STACK]] to i64
 // CHECK-BE: [[STACK_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[STACK_VAL]], 4
 // CHECK-BE: [[STACK:%[0-9]+]] = inttoptr i64 [[STACK_VAL_ALIGNED]] to i8*
@@ -42,14 +42,14 @@
 
 // CHECK: [[VAARG_END]]
 // CHECK: [[ADDR:%[a-z._0-9]+]] = phi i32* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
-// CHECK: [[RESULT:%[a-z_0-9]+]] = load i32* [[ADDR]]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i32, i32* [[ADDR]]
 // CHECK: ret i32 [[RESULT]]
 }
 
 __int128 aligned_int(void) {
 // CHECK-LABEL: define i128 @aligned_int
   return va_arg(the_list, __int128);
-// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
 
@@ -57,30 +57,30 @@
 // CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15
 // CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16
 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16
-// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
 
 // CHECK: [[VAARG_IN_REG]]
-// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128*
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
-// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64
 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[ALIGNED_STACK_PTR]], i32 16
-// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[ALIGNED_STACK_PTR]], i32 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128*
 // CHECK: br label %[[VAARG_END]]
 
 // CHECK: [[VAARG_END]]
 // CHECK: [[ADDR:%[a-z._0-9]+]] = phi i128* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
-// CHECK: [[RESULT:%[a-z_0-9]+]] = load i128* [[ADDR]]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i128, i128* [[ADDR]]
 // CHECK: ret i128 [[RESULT]]
 }
 
@@ -91,34 +91,34 @@
 struct bigstruct simple_indirect(void) {
 // CHECK-LABEL: define void @simple_indirect
   return va_arg(the_list, struct bigstruct);
-// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
 
 // CHECK: [[VAARG_MAYBE_REG]]
 // CHECK-NOT: and i32
 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
-// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
 
 // CHECK: [[VAARG_IN_REG]]
-// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.bigstruct**
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
-// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK-NOT: and i64
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
-// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.bigstruct**
 // CHECK: br label %[[VAARG_END]]
 
 // CHECK: [[VAARG_END]]
 // CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
-// CHECK: load %struct.bigstruct** [[ADDR]]
+// CHECK: load %struct.bigstruct*, %struct.bigstruct** [[ADDR]]
 }
 
 struct aligned_bigstruct {
@@ -129,50 +129,50 @@
 struct aligned_bigstruct simple_aligned_indirect(void) {
 // CHECK-LABEL: define void @simple_aligned_indirect
   return va_arg(the_list, struct aligned_bigstruct);
-// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
 
 // CHECK: [[VAARG_MAYBE_REG]]
 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
-// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
 
 // CHECK: [[VAARG_IN_REG]]
-// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.aligned_bigstruct**
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
-// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
-// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.aligned_bigstruct**
 // CHECK: br label %[[VAARG_END]]
 
 // CHECK: [[VAARG_END]]
 // CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.aligned_bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
-// CHECK: load %struct.aligned_bigstruct** [[ADDR]]
+// CHECK: load %struct.aligned_bigstruct*, %struct.aligned_bigstruct** [[ADDR]]
 }
 
 double simple_double(void) {
 // CHECK-LABEL: define double @simple_double
   return va_arg(the_list, double);
-// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG]]
 
 // CHECK: [[VAARG_MAYBE_REG]]
 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 16
-// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4)
 // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
 
 // CHECK: [[VAARG_IN_REG]]
-// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
 // CHECK-BE: [[REG_ADDR_VAL:%[0-9]+]] = ptrtoint i8* [[REG_ADDR]] to i64
 // CHECK-BE: [[REG_ADDR_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[REG_ADDR_VAL]], 8
 // CHECK-BE: [[REG_ADDR:%[0-9]+]] = inttoptr i64 [[REG_ADDR_VAL_ALIGNED]] to i8*
@@ -180,15 +180,15 @@
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
-// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
-// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to double*
 // CHECK: br label %[[VAARG_END]]
 
 // CHECK: [[VAARG_END]]
 // CHECK: [[ADDR:%[a-z._0-9]+]] = phi double* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
-// CHECK: [[RESULT:%[a-z_0-9]+]] = load double* [[ADDR]]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load double, double* [[ADDR]]
 // CHECK: ret double [[RESULT]]
 }
 
@@ -199,38 +199,38 @@
 struct hfa simple_hfa(void) {
 // CHECK-LABEL: define %struct.hfa @simple_hfa
   return va_arg(the_list, struct hfa);
-// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
 
 // CHECK: [[VAARG_MAYBE_REG]]
 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 32
-// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4)
 // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
 
 // CHECK: [[VAARG_IN_REG]]
-// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2)
-// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]]
-// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 0
-// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 12
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2)
+// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 0
+// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 12
 // CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
-// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i32 0, i32 0
-// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]]
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i32 0, i32 0
+// CHECK: [[EL:%[a-z_0-9]+]] = load float, float* [[EL_TYPED]]
 // CHECK: store float [[EL]], float* [[EL_TMPADDR]]
-// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 16
-// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 28
+// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 16
+// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 28
 // CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
-// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA]], i32 0, i32 1
-// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]]
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA]], i32 0, i32 1
+// CHECK: [[EL:%[a-z_0-9]+]] = load float, float* [[EL_TYPED]]
 // CHECK: store float [[EL]], float* [[EL_TMPADDR]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast [2 x float]* %[[TMP_HFA]] to %struct.hfa*
 // CHECK: br label %[[VAARG_END:[a-z_.0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
-// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
-// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.hfa*
 // CHECK: br label %[[VAARG_END]]
 
diff --git a/test/CodeGen/address-space-field1.c b/test/CodeGen/address-space-field1.c
index c6b3181..109c692 100644
--- a/test/CodeGen/address-space-field1.c
+++ b/test/CodeGen/address-space-field1.c
@@ -5,17 +5,17 @@
 // CHECK:  [[p2addr:%.*]] = alloca %struct.S addrspace(2)*
 // CHECK:  store %struct.S addrspace(1)* %p1, %struct.S addrspace(1)** [[p1addr]]
 // CHECK:  store %struct.S addrspace(2)* %p2, %struct.S addrspace(2)** [[p2addr]]
-// CHECK:  [[t0:%.*]] = load %struct.S addrspace(2)** [[p2addr]], align 8 
-// CHECK:  [[t1:%.*]] = getelementptr inbounds %struct.S addrspace(2)* [[t0]], i32 0, i32 1
-// CHECK:  [[t2:%.*]] = load i32 addrspace(2)* [[t1]], align 4
-// CHECK:  [[t3:%.*]] = load %struct.S addrspace(1)** [[p1addr]], align 8  
-// CHECK:  [[t4:%.*]] = getelementptr inbounds %struct.S addrspace(1)* [[t3]], i32 0, i32 0 
+// CHECK:  [[t0:%.*]] = load %struct.S addrspace(2)*, %struct.S addrspace(2)** [[p2addr]], align 8 
+// CHECK:  [[t1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(2)* [[t0]], i32 0, i32 1
+// CHECK:  [[t2:%.*]] = load i32, i32 addrspace(2)* [[t1]], align 4
+// CHECK:  [[t3:%.*]] = load %struct.S addrspace(1)*, %struct.S addrspace(1)** [[p1addr]], align 8  
+// CHECK:  [[t4:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(1)* [[t3]], i32 0, i32 0 
 // CHECK:  store i32 [[t2]], i32 addrspace(1)* [[t4]], align 4
-// CHECK:  [[t5:%.*]] = load %struct.S addrspace(2)** [[p2addr]], align 8  
-// CHECK:  [[t6:%.*]] = getelementptr inbounds %struct.S addrspace(2)* [[t5]], i32 0, i32 0 
-// CHECK:  [[t7:%.*]] = load i32 addrspace(2)* [[t6]], align 4            
-// CHECK:  [[t8:%.*]] = load %struct.S addrspace(1)** [[p1addr]], align 8  
-// CHECK:  [[t9:%.*]] = getelementptr inbounds %struct.S addrspace(1)* [[t8]], i32 0, i32 1 
+// CHECK:  [[t5:%.*]] = load %struct.S addrspace(2)*, %struct.S addrspace(2)** [[p2addr]], align 8  
+// CHECK:  [[t6:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(2)* [[t5]], i32 0, i32 0 
+// CHECK:  [[t7:%.*]] = load i32, i32 addrspace(2)* [[t6]], align 4            
+// CHECK:  [[t8:%.*]] = load %struct.S addrspace(1)*, %struct.S addrspace(1)** [[p1addr]], align 8  
+// CHECK:  [[t9:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(1)* [[t8]], i32 0, i32 1 
 // CHECK:  store i32 [[t7]], i32 addrspace(1)* [[t9]], align 4
 // CHECK:  ret void
 // CHECK:}
diff --git a/test/CodeGen/address-space.c b/test/CodeGen/address-space.c
index 110406e..61deb26 100644
--- a/test/CodeGen/address-space.c
+++ b/test/CodeGen/address-space.c
@@ -7,11 +7,11 @@
 int ban[10] __attribute__((address_space(1)));
 
 // CHECK-LABEL: define i32 @test1() 
-// CHECK: load i32 addrspace(1)* @foo
+// CHECK: load i32, i32 addrspace(1)* @foo
 int test1() { return foo; }
 
 // CHECK-LABEL: define i32 @test2(i32 %i) 
-// CHECK: load i32 addrspace(1)*
+// CHECK: load i32, i32 addrspace(1)*
 // CHECK-NEXT: ret i32
 int test2(int i) { return ban[i]; }
 
@@ -19,9 +19,9 @@
 __attribute__((address_space(2))) int *A, *B;
 
 // CHECK-LABEL: define void @test3()
-// CHECK: load i32 addrspace(2)** @B
-// CHECK: load i32 addrspace(2)*
-// CHECK: load i32 addrspace(2)** @A
+// CHECK: load i32 addrspace(2)*, i32 addrspace(2)** @B
+// CHECK: load i32, i32 addrspace(2)*
+// CHECK: load i32 addrspace(2)*, i32 addrspace(2)** @A
 // CHECK: store i32 {{.*}}, i32 addrspace(2)*
 void test3() {
   *A = *B;
diff --git a/test/CodeGen/alignment.c b/test/CodeGen/alignment.c
index 04d6aac..0a59801 100644
--- a/test/CodeGen/alignment.c
+++ b/test/CodeGen/alignment.c
@@ -23,7 +23,7 @@
   return *p;
 }
 // CHECK: @test1a(
-// CHECK: load i32* {{.*}}, align 1
+// CHECK: load i32, i32* {{.*}}, align 1
 // CHECK: ret i32
 
 
diff --git a/test/CodeGen/annotations-builtin.c b/test/CodeGen/annotations-builtin.c
index 7938e49..8a3b3ff 100644
--- a/test/CodeGen/annotations-builtin.c
+++ b/test/CodeGen/annotations-builtin.c
@@ -28,7 +28,7 @@
 // CHECK: call i64 @llvm.annotation.i64
 
     int inta = __builtin_annotation(intfoo, "annotation_a");
-// CHECK: load i32* @intfoo
+// CHECK: load i32, i32* @intfoo
 // CHECK-NEXT: call i32 @llvm.annotation.i32
 // CHECK-NEXT: store
 
diff --git a/test/CodeGen/annotations-field.c b/test/CodeGen/annotations-field.c
index 6b44367..02bae15 100644
--- a/test/CodeGen/annotations-field.c
+++ b/test/CodeGen/annotations-field.c
@@ -13,7 +13,7 @@
 int main(int argc, char **argv) {
     struct foo f;
     f.v = argc;
-// CHECK: getelementptr inbounds %struct.foo* %f, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 0
 // CHECK-NEXT: bitcast i32* {{.*}} to i8*
 // CHECK-NEXT: call i8* @llvm.ptr.annotation.p0i8({{.*}}str{{.*}}str{{.*}}i32 8)
 // CHECK-NEXT: bitcast i8* {{.*}} to i32*
@@ -21,7 +21,7 @@
 // CHECK-NEXT: call i8* @llvm.ptr.annotation.p0i8({{.*}}str{{.*}}str{{.*}}i32 8)
 // CHECK-NEXT: bitcast i8* {{.*}} to i32*
     gf.v = argc;
-// CHECK: bitcast i32* getelementptr inbounds (%struct.foo* @gf, i32 0, i32 0) to i8*
+// CHECK: bitcast i32* getelementptr inbounds (%struct.foo, %struct.foo* @gf, i32 0, i32 0) to i8*
 // CHECK-NEXT: call i8* @llvm.ptr.annotation.p0i8({{.*}}str{{.*}}str{{.*}}i32 8)
     return 0;
 }
diff --git a/test/CodeGen/annotations-var.c b/test/CodeGen/annotations-var.c
index da9e0b6..6e8ad34 100644
--- a/test/CodeGen/annotations-var.c
+++ b/test/CodeGen/annotations-var.c
@@ -34,9 +34,9 @@
 // LOCAL-LABEL: define void @local()
 // LOCAL:      [[LOCALVAR:%.*]] = alloca i32,
 // LOCAL-NEXT: [[T0:%.*]] = bitcast i32* [[LOCALVAR]] to i8*
-// LOCAL-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 33)
+// LOCAL-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 33)
 // LOCAL-NEXT: [[T0:%.*]] = bitcast i32* [[LOCALVAR]] to i8*
-// LOCAL-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 33)
+// LOCAL-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 33)
 }
 
 void undef(void) {
@@ -44,5 +44,5 @@
 // UNDEF-LABEL: define void @undef()
 // UNDEF:      [[UNDEFVAR:%.*]] = alloca i32,
 // UNDEF-NEXT: [[T0:%.*]] = bitcast i32* [[UNDEFVAR]] to i8*
-// UNDEF-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 43)
+// UNDEF-NEXT: call void @llvm.var.annotation(i8* [[T0]], i8* getelementptr inbounds ([15 x i8], [15 x i8]* @{{.*}}), i8* getelementptr inbounds ({{.*}}), i32 43)
 }
diff --git a/test/CodeGen/arm-aapcs-vfp.c b/test/CodeGen/arm-aapcs-vfp.c
index 9c463fd..38044cb 100644
--- a/test/CodeGen/arm-aapcs-vfp.c
+++ b/test/CodeGen/arm-aapcs-vfp.c
@@ -118,7 +118,7 @@
   neon_callee(arg);
 }
 
-// CHECK-LABEL: define arm_aapcs_vfpcc void @f33(%struct.s33* byval align 1 %s)
+// CHECK-LABEL: define arm_aapcs_vfpcc void @f33(%struct.s33* byval align 4 %s)
 struct s33 { char buf[32*32]; };
 void f33(struct s33 s) { }
 
diff --git a/test/CodeGen/arm-abi-vector.c b/test/CodeGen/arm-abi-vector.c
index 12e38ba..468acdf 100644
--- a/test/CodeGen/arm-abi-vector.c
+++ b/test/CodeGen/arm-abi-vector.c
@@ -17,15 +17,15 @@
 // CHECK: alloca <2 x i32>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 [[VAR:%.*]], -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
 // CHECK: bitcast i8* [[AP_ALIGN]] to <2 x i32>*
 // APCS-GNU: varargs_vec_2i
 // APCS-GNU: alloca <2 x i32>, align 8
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <2 x i32>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* {{%.*}}, i32 8
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
 // APCS-GNU: bitcast <2 x i32>* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <2 x i32>* [[VAR_ALIGN]]
+// APCS-GNU: load <2 x i32>, <2 x i32>* [[VAR_ALIGN]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -46,11 +46,11 @@
 double varargs_vec_3c(int fixed, ...) {
 // CHECK: varargs_vec_3c
 // CHECK: alloca <3 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP:%.*]], i32 4
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
 // CHECK: bitcast i8* [[AP]] to <3 x i8>*
 // APCS-GNU: varargs_vec_3c
 // APCS-GNU: alloca <3 x i8>, align 4
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* [[AP:%.*]], i32 4
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
 // APCS-GNU: bitcast i8* [[AP]] to <3 x i8>*
   va_list ap;
   double sum = fixed;
@@ -74,15 +74,15 @@
 // CHECK: alloca <5 x i8>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
 // CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i8>*
 // APCS-GNU: varargs_vec_5c
 // APCS-GNU: alloca <5 x i8>, align 8
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <5 x i8>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* {{%.*}}, i32 8
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
 // APCS-GNU: bitcast <5 x i8>* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <5 x i8>* [[VAR_ALIGN]]
+// APCS-GNU: load <5 x i8>, <5 x i8>* [[VAR_ALIGN]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -106,17 +106,17 @@
 // CHECK: [[VAR_ALIGN:%.*]] = alloca <9 x i8>
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast <9 x i8>* [[VAR_ALIGN]] to i8*
 // CHECK: call void @llvm.memcpy
-// CHECK: load <9 x i8>* [[VAR_ALIGN]]
+// CHECK: load <9 x i8>, <9 x i8>* [[VAR_ALIGN]]
 // APCS-GNU: varargs_vec_9c
 // APCS-GNU: alloca <9 x i8>, align 16
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <9 x i8>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* {{%.*}}, i32 16
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
 // APCS-GNU: bitcast <9 x i8>* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <9 x i8>* [[VAR_ALIGN]]
+// APCS-GNU: load <9 x i8>, <9 x i8>* [[VAR_ALIGN]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -136,14 +136,14 @@
 
 double varargs_vec_19c(int fixed, ...) {
 // CHECK: varargs_vec_19c
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP:%.*]], i32 4
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
 // CHECK: [[VAR:%.*]] = bitcast i8* [[AP]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8** [[VAR]]
+// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
 // APCS-GNU: varargs_vec_19c
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* [[AP:%.*]], i32 4
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
 // APCS-GNU: [[VAR:%.*]] = bitcast i8* [[AP]] to i8**
-// APCS-GNU: [[VAR2:%.*]] = load i8** [[VAR]]
+// APCS-GNU: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // APCS-GNU: bitcast i8* [[VAR2]] to <19 x i8>*
   va_list ap;
   double sum = fixed;
@@ -167,15 +167,15 @@
 // CHECK: alloca <3 x i16>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i16>*
 // APCS-GNU: varargs_vec_3s
 // APCS-GNU: alloca <3 x i16>, align 8
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <3 x i16>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* {{%.*}}, i32 8
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
 // APCS-GNU: bitcast <3 x i16>* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <3 x i16>* [[VAR_ALIGN]]
+// APCS-GNU: load <3 x i16>, <3 x i16>* [[VAR_ALIGN]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -199,17 +199,17 @@
 // CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast <5 x i16>* [[VAR_ALIGN]] to i8*
 // CHECK: call void @llvm.memcpy
-// CHECK: load <5 x i16>* [[VAR_ALIGN]]
+// CHECK: load <5 x i16>, <5 x i16>* [[VAR_ALIGN]]
 // APCS-GNU: varargs_vec_5s
 // APCS-GNU: alloca <5 x i16>, align 16
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <5 x i16>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* {{%.*}}, i32 16
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
 // APCS-GNU: bitcast <5 x i16>* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <5 x i16>* [[VAR_ALIGN]]
+// APCS-GNU: load <5 x i16>, <5 x i16>* [[VAR_ALIGN]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -238,11 +238,11 @@
 // CHECK: varargs_struct
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to %struct.StructWithVec*
 // APCS-GNU: varargs_struct
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca %struct.StructWithVec
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8* {{%.*}}, i32 16
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
 // APCS-GNU: bitcast %struct.StructWithVec* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
   va_list ap;
diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c
index e4a10fd..b671626 100644
--- a/test/CodeGen/arm-arguments.c
+++ b/test/CodeGen/arm-arguments.c
@@ -176,8 +176,8 @@
 // PR13350
 struct s33 { char buf[32*32]; };
 void f33(struct s33 s) { }
-// APCS-GNU-LABEL: define void @f33(%struct.s33* byval align 1 %s)
-// AAPCS-LABEL: define arm_aapcscc void @f33(%struct.s33* byval align 1 %s)
+// APCS-GNU-LABEL: define void @f33(%struct.s33* byval align 4 %s)
+// AAPCS-LABEL: define arm_aapcscc void @f33(%struct.s33* byval align 4 %s)
 
 // PR14048
 struct s34 { char c; };
@@ -185,7 +185,7 @@
 void g34(struct s34 *s) { f34(*s); }
 // AAPCS: @g34(%struct.s34* %s)
 // AAPCS: %[[a:.*]] = alloca [1 x i32]
-// AAPCS: load [1 x i32]* %[[a]]
+// AAPCS: load [1 x i32], [1 x i32]* %[[a]]
 
 // rdar://12596507
 struct s35
@@ -204,17 +204,17 @@
                             *(float32x4_t *)&s2);
   return v;
 }
-// APCS-GNU-LABEL: define <4 x float> @f35(i32 %i, %struct.s35* byval align 16, %struct.s35* byval align 16)
+// APCS-GNU-LABEL: define <4 x float> @f35(i32 %i, %struct.s35* byval align 4, %struct.s35* byval align 4)
 // APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
 // APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
 // APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
 // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// APCS-GNU: load <4 x float>* %[[d]], align 16
-// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 16, %struct.s35* byval align 16)
+// APCS-GNU: load <4 x float>, <4 x float>* %[[d]], align 16
+// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 8, %struct.s35* byval align 8)
 // AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
 // AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
 // AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
 // AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
-// AAPCS: load <4 x float>* %[[d]], align 16
+// AAPCS: load <4 x float>, <4 x float>* %[[d]], align 16
diff --git a/test/CodeGen/arm-atomics-m.c b/test/CodeGen/arm-atomics-m.c
index 51e2d1d..cd9e71e 100644
--- a/test/CodeGen/arm-atomics-m.c
+++ b/test/CodeGen/arm-atomics-m.c
@@ -15,7 +15,7 @@
   __atomic_fetch_add(&i, 1, memory_order_seq_cst);
   // CHECK: atomicrmw sub i32* {{.*}} seq_cst
   __atomic_fetch_sub(&i, 1, memory_order_seq_cst);
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   int r;
   __atomic_load(&i, &r, memory_order_seq_cst);
   // CHECK: store atomic i32 {{.*}} seq_cst
diff --git a/test/CodeGen/arm-atomics.c b/test/CodeGen/arm-atomics.c
index b54e277..aa5a6ec 100644
--- a/test/CodeGen/arm-atomics.c
+++ b/test/CodeGen/arm-atomics.c
@@ -17,7 +17,7 @@
   __atomic_fetch_add(&i, 1, memory_order_seq_cst);
   // CHECK: atomicrmw sub i32* {{.*}} seq_cst
   __atomic_fetch_sub(&i, 1, memory_order_seq_cst);
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   int r;
   __atomic_load(&i, &r, memory_order_seq_cst);
   // CHECK: store atomic i32 {{.*}} seq_cst
@@ -28,7 +28,7 @@
   __atomic_fetch_add(&l, 1, memory_order_seq_cst);
   // CHECK: atomicrmw sub i64* {{.*}} seq_cst
   __atomic_fetch_sub(&l, 1, memory_order_seq_cst);
-  // CHECK: load atomic i64* {{.*}} seq_cst
+  // CHECK: load atomic i64, i64* {{.*}} seq_cst
   long long rl;
   __atomic_load(&l, &rl, memory_order_seq_cst);
   // CHECK: store atomic i64 {{.*}} seq_cst
diff --git a/test/CodeGen/arm-clear.c b/test/CodeGen/arm-clear.c
index 8ef3675..566d5da 100644
--- a/test/CodeGen/arm-clear.c
+++ b/test/CodeGen/arm-clear.c
@@ -3,7 +3,7 @@
 
 void clear(void *ptr, void *ptr2) {
   // CHECK: clear
-  // CHECK: load i8**
-  // CHECK: load i8**
+  // CHECK: load i8*, i8**
+  // CHECK: load i8*, i8**
   __clear_cache(ptr, ptr2);
 }
diff --git a/test/CodeGen/arm-vector-align.c b/test/CodeGen/arm-vector-align.c
index 9e1ae5d..15dd13e 100644
--- a/test/CodeGen/arm-vector-align.c
+++ b/test/CodeGen/arm-vector-align.c
@@ -23,7 +23,7 @@
 // Radar 10538555: Make sure unaligned load/stores do not gain alignment.
 void t2(char *addr) {
 // CHECK: @t2
-// CHECK: load i32* %{{.*}}, align 1
+// CHECK: load i32, i32* %{{.*}}, align 1
   int32x2_t vec = vld1_dup_s32(addr);
 // CHECK: store i32 %{{.*}}, i32* {{.*}}, align 1
   vst1_lane_s32(addr, vec, 1);
diff --git a/test/CodeGen/arm64-abi-vector.c b/test/CodeGen/arm64-abi-vector.c
index 502fb08..f4895c1 100644
--- a/test/CodeGen/arm64-abi-vector.c
+++ b/test/CodeGen/arm64-abi-vector.c
@@ -16,7 +16,7 @@
 double varargs_vec_3c(int fixed, ...) {
 // CHECK: varargs_vec_3c
 // CHECK: alloca <3 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i8>*
   va_list ap;
   double sum = fixed;
@@ -36,7 +36,7 @@
 double varargs_vec_4c(int fixed, ...) {
 // CHECK: varargs_vec_4c
 // CHECK: alloca <4 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: bitcast i8* [[AP_CUR]] to <4 x i8>*
   va_list ap;
   double sum = fixed;
@@ -56,7 +56,7 @@
 double varargs_vec_5c(int fixed, ...) {
 // CHECK: varargs_vec_5c
 // CHECK: alloca <5 x i8>, align 8
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: bitcast i8* [[AP_CUR]] to <5 x i8>*
   va_list ap;
   double sum = fixed;
@@ -78,7 +78,7 @@
 // CHECK: alloca <9 x i8>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <9 x i8>*
   va_list ap;
   double sum = fixed;
@@ -97,9 +97,9 @@
 
 double varargs_vec_19c(int fixed, ...) {
 // CHECK: varargs_vec_19c
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8** [[VAR]]
+// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
   va_list ap;
   double sum = fixed;
@@ -119,7 +119,7 @@
 double varargs_vec_3s(int fixed, ...) {
 // CHECK: varargs_vec_3s
 // CHECK: alloca <3 x i16>, align 8
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i16>*
   va_list ap;
   double sum = fixed;
@@ -141,7 +141,7 @@
 // CHECK: alloca <5 x i16>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i16>*
   va_list ap;
   double sum = fixed;
@@ -163,7 +163,7 @@
 // CHECK: alloca <3 x i32>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i32>*
   va_list ap;
   double sum = fixed;
@@ -183,9 +183,9 @@
 double varargs_vec_5i(int fixed, ...) {
 // CHECK: varargs_vec_5i
 // CHECK: alloca <5 x i32>, align 16
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8** [[VAR]]
+// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // CHECK: bitcast i8* [[VAR2]] to <5 x i32>*
   va_list ap;
   double sum = fixed;
@@ -205,9 +205,9 @@
 double varargs_vec_3d(int fixed, ...) {
 // CHECK: varargs_vec_3d
 // CHECK: alloca <3 x double>, align 16
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8** [[VAR]]
+// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // CHECK: bitcast i8* [[VAR2]] to <3 x double>*
   va_list ap;
   double sum = fixed;
@@ -230,51 +230,51 @@
   double sum = fixed;
   va_start(ap, fixed);
   __char3 c3 = va_arg(ap, __char3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i8>*
   sum = sum + c3.x + c3.y;
   __char5 c5 = va_arg(ap, __char5);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: bitcast i8* [[AP_CUR]] to <5 x i8>*
   sum = sum + c5.x + c5.y;
   __char9 c9 = va_arg(ap, __char9);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <9 x i8>*
   sum = sum + c9.x + c9.y;
   __char19 c19 = va_arg(ap, __char19);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8** [[VAR]]
+// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
   sum = sum + c19.x + c19.y;
   __short3 s3 = va_arg(ap, __short3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i16>*
   sum = sum + s3.x + s3.y;
   __short5 s5 = va_arg(ap, __short5);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i16>*
   sum = sum + s5.x + s5.y;
   __int3 i3 = va_arg(ap, __int3);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i32>*
   sum = sum + i3.x + i3.y;
   __int5 i5 = va_arg(ap, __int5);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8** [[VAR]]
+// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // CHECK: bitcast i8* [[VAR2]] to <5 x i32>*
   sum = sum + i5.x + i5.y;
   __double3 d3 = va_arg(ap, __double3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
 // CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8** [[VAR]]
+// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
 // CHECK: bitcast i8* [[VAR2]] to <3 x double>*
   sum = sum + d3.x + d3.y;
   va_end(ap);
@@ -339,7 +339,7 @@
 
 __attribute__((noinline)) double args_vec_19c(int fixed, __char19 c19) {
 // CHECK: args_vec_19c
-// CHECK: [[C19:%.*]] = load <19 x i8>* {{.*}}, align 16
+// CHECK: [[C19:%.*]] = load <19 x i8>, <19 x i8>* {{.*}}, align 16
   double sum = fixed;
   sum = sum + c19.x + c19.y;
   return sum;
@@ -401,7 +401,7 @@
 
 __attribute__((noinline)) double args_vec_5i(int fixed, __int5 c5) {
 // CHECK: args_vec_5i
-// CHECK: [[C5:%.*]] = load <5 x i32>* {{%.*}}, align 16
+// CHECK: [[C5:%.*]] = load <5 x i32>, <5 x i32>* {{%.*}}, align 16
   double sum = fixed;
   sum = sum + c5.x + c5.y;
   return sum;
@@ -416,7 +416,7 @@
 __attribute__((noinline)) double args_vec_3d(int fixed, __double3 c3) {
 // CHECK: args_vec_3d
 // CHECK: [[CAST:%.*]] = bitcast <3 x double>* {{%.*}} to <4 x double>*
-// CHECK: [[LOAD:%.*]] = load <4 x double>* [[CAST]]
+// CHECK: [[LOAD:%.*]] = load <4 x double>, <4 x double>* [[CAST]]
 // CHECK: shufflevector <4 x double> [[LOAD]], <4 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
   double sum = fixed;
   sum = sum + c3.x + c3.y;
diff --git a/test/CodeGen/arm64-arguments.c b/test/CodeGen/arm64-arguments.c
index ae1ff98..5c56fd4 100644
--- a/test/CodeGen/arm64-arguments.c
+++ b/test/CodeGen/arm64-arguments.c
@@ -134,7 +134,7 @@
 void f34(struct s34 s);
 void g34(struct s34 *s) { f34(*s); }
 // CHECK: @g34(%struct.s34* %s)
-// CHECK: %[[a:.*]] = load i8* %{{.*}}
+// CHECK: %[[a:.*]] = load i8, i8* %{{.*}}
 // CHECK: zext i8 %[[a]] to i64
 // CHECK: call void @f34(i64 %{{.*}})
 
@@ -200,9 +200,9 @@
 // CHECK: %s1 = alloca %struct.s35, align 16
 // CHECK: %s2 = alloca %struct.s35, align 16
 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
-// CHECK: load <4 x float>* %[[a]], align 16
+// CHECK: load <4 x float>, <4 x float>* %[[a]], align 16
 // CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>*
-// CHECK: load <4 x float>* %[[b]], align 16
+// CHECK: load <4 x float>, <4 x float>* %[[b]], align 16
   float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
                             *(float32x4_t *)&s2);
   return v;
@@ -222,9 +222,9 @@
 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
 // CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
-// CHECK: load <4 x i32>* %[[a]], align 16
+// CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
 // CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
-// CHECK: load <4 x i32>* %[[b]], align 16
+// CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16
   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
                           *(int32x4_t *)&s2);
   return v;
@@ -239,9 +239,9 @@
 int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) {
 // CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2)
 // CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>*
-// CHECK: load <4 x i32>* %[[a]], align 16
+// CHECK: load <4 x i32>, <4 x i32>* %[[a]], align 16
 // CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>*
-// CHECK: load <4 x i32>* %[[b]], align 16
+// CHECK: load <4 x i32>, <4 x i32>* %[[b]], align 16
   int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
                           *(int32x4_t *)&s2);
   return v;
@@ -277,18 +277,18 @@
 // CHECK: %s2 = alloca %struct.s38, align 8
 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1
   return s1.i + s2.i + i + s1.s + s2.s;
 }
 s38_no_align g38;
 s38_no_align g38_2;
 int caller38() {
 // CHECK: define i32 @caller38()
-// CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
 // CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
   return f38(3, g38, g38_2);
 }
@@ -301,16 +301,16 @@
 // CHECK: %s2 = alloca %struct.s38, align 8
 // CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
 // CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller38_stack() {
 // CHECK: define i32 @caller38_stack()
-// CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1
-// CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
+// CHECK: %[[a:.*]] = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 1
+// CHECK: %[[b:.*]] = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
 // CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
   return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
 }
@@ -330,18 +330,18 @@
 // CHECK: %s2 = alloca %struct.s39, align 16
 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
   return s1.i + s2.i + i + s1.s + s2.s;
 }
 s39_with_align g39;
 s39_with_align g39_2;
 int caller39() {
 // CHECK: define i32 @caller39()
-// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
 // CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
   return f39(3, g39, g39_2);
 }
@@ -354,16 +354,16 @@
 // CHECK: %s2 = alloca %struct.s39, align 16
 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s39, %struct.s39* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller39_stack() {
 // CHECK: define i32 @caller39_stack()
-// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 1
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
 // CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
   return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
 }
@@ -385,18 +385,18 @@
 // CHECK: %s2 = alloca %struct.s40, align 8
 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1
   return s1.i + s2.i + i + s1.s + s2.s;
 }
 s40_no_align g40;
 s40_no_align g40_2;
 int caller40() {
 // CHECK: define i32 @caller40()
-// CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
 // CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f40(3, g40, g40_2);
 }
@@ -409,16 +409,16 @@
 // CHECK: %s2 = alloca %struct.s40, align 8
 // CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
 // CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller40_stack() {
 // CHECK: define i32 @caller40_stack()
-// CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
-// CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
+// CHECK: %[[a:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
+// CHECK: %[[b:.*]] = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
 // CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
   return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
 }
@@ -440,18 +440,18 @@
 // CHECK: %s2 = alloca %struct.s41, align 16
 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
   return s1.i + s2.i + i + s1.s + s2.s;
 }
 s41_with_align g41;
 s41_with_align g41_2;
 int caller41() {
 // CHECK: define i32 @caller41()
-// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
 // CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
   return f41(3, g41, g41_2);
 }
@@ -464,16 +464,16 @@
 // CHECK: %s2 = alloca %struct.s41, align 16
 // CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
 // CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
-// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s41, %struct.s41* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller41_stack() {
 // CHECK: define i32 @caller41_stack()
-// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1
-// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
+// CHECK: %[[a:.*]] = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 1
+// CHECK: %[[b:.*]] = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
 // CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
   return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
 }
@@ -493,10 +493,10 @@
 __attribute__ ((noinline))
 int f42(int i, s42_no_align s1, s42_no_align s2) {
 // CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2)
-// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1
   return s1.i + s2.i + i + s1.s + s2.s;
 }
 s42_no_align g42;
@@ -517,10 +517,10 @@
 int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s42_no_align s1, s42_no_align s2) {
 // CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2)
-// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s42, %struct.s42* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller42_stack() {
@@ -550,10 +550,10 @@
 __attribute__ ((noinline))
 int f43(int i, s43_with_align s1, s43_with_align s2) {
 // CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2)
-// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1
   return s1.i + s2.i + i + s1.s + s2.s;
 }
 s43_with_align g43;
@@ -574,10 +574,10 @@
 int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s43_with_align s1, s43_with_align s2) {
 // CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2)
-// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0
-// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1
-// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s1, i32 0, i32 1
+// CHECK: getelementptr inbounds %struct.s43, %struct.s43* %s2, i32 0, i32 1
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
 }
 int caller43_stack() {
@@ -626,10 +626,10 @@
 float test_hfa(int n, ...) {
 // CHECK-LABEL: define float @test_hfa(i32 %n, ...)
 // CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
+// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // HFA is not indirect, so occupies its full 16 bytes on the stack.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 16
+// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 16
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
@@ -652,15 +652,15 @@
 float test_toobig_hfa(int n, ...) {
 // CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...)
 // CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
+// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8
+// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 8
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
-// CHECK: [[HFAPTR:%.*]] = load i8** [[HFAPTRPTR]]
+// CHECK: [[HFAPTR:%.*]] = load i8*, i8** [[HFAPTRPTR]]
 // CHECK: bitcast i8* [[HFAPTR]] to %struct.TooBigHFA*
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
@@ -675,16 +675,16 @@
 int32x4_t test_hva(int n, ...) {
 // CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...)
 // CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
+// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
   // must be properly aligned.
-// CHECK: [[ALIGN0:%.*]] = getelementptr i8* [[CURLIST]], i32 15
+// CHECK: [[ALIGN0:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 15
 // CHECK: [[ALIGN1:%.*]] = ptrtoint i8* [[ALIGN0]] to i64
 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
 
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[ALIGNED_LIST]], i32 32
+// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[ALIGNED_LIST]], i32 32
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
@@ -701,15 +701,15 @@
 int32x4_t test_toobig_hva(int n, ...) {
 // CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...)
 // CHECK: [[THELIST:%.*]] = alloca i8*
-// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
+// CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8
+// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 8
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
-// CHECK: [[HVAPTR:%.*]] = load i8** [[HVAPTRPTR]]
+// CHECK: [[HVAPTR:%.*]] = load i8*, i8** [[HVAPTRPTR]]
 // CHECK: bitcast i8* [[HVAPTR]] to %struct.TooBigHVA*
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
diff --git a/test/CodeGen/arm64_neon_high_half.c b/test/CodeGen/arm64_neon_high_half.c
index 577a09e..6008ba5 100644
--- a/test/CodeGen/arm64_neon_high_half.c
+++ b/test/CodeGen/arm64_neon_high_half.c
@@ -394,32 +394,32 @@
 }
 
 int8x16_t test_vaddhn_high_s16(int8x8_t lowpart, int16x8_t lhs, int16x8_t rhs) {
-  // CHECK: addhn2.16b v0, v1, v2
+  // CHECK: addhn2.16b v0, {{v1, v2|v2, v1}}
   return vaddhn_high_s16(lowpart, lhs, rhs);
 }
 
 int16x8_t test_vaddhn_high_s32(int16x4_t lowpart, int32x4_t lhs, int32x4_t rhs) {
-  // CHECK: addhn2.8h v0, v1, v2
+  // CHECK: addhn2.8h v0, {{v1, v2|v2, v1}}
   return vaddhn_high_s32(lowpart, lhs, rhs);
 }
 
 int32x4_t test_vaddhn_high_s64(int32x2_t lowpart, int64x2_t lhs, int64x2_t rhs) {
-  // CHECK: addhn2.4s v0, v1, v2
+  // CHECK: addhn2.4s v0, {{v1, v2|v2, v1}}
   return vaddhn_high_s64(lowpart, lhs, rhs);
 }
 
 uint8x16_t test_vaddhn_high_u16(uint8x8_t lowpart, uint16x8_t lhs, uint16x8_t rhs) {
-  // CHECK: addhn2.16b v0, v1, v2
+  // CHECK: addhn2.16b v0, {{v1, v2|v2, v1}}
   return vaddhn_high_s16(lowpart, lhs, rhs);
 }
 
 uint16x8_t test_vaddhn_high_u32(uint16x4_t lowpart, uint32x4_t lhs, uint32x4_t rhs) {
-  // CHECK: addhn2.8h v0, v1, v2
+  // CHECK: addhn2.8h v0, {{v1, v2|v2, v1}}
   return vaddhn_high_s32(lowpart, lhs, rhs);
 }
 
 uint32x4_t test_vaddhn_high_u64(uint32x2_t lowpart, uint64x2_t lhs, uint64x2_t rhs) {
-  // CHECK: addhn2.4s v0, v1, v2
+  // CHECK: addhn2.4s v0, {{v1, v2|v2, v1}}
   return vaddhn_high_s64(lowpart, lhs, rhs);
 }
 
diff --git a/test/CodeGen/arm64_vset_lane.c b/test/CodeGen/arm64_vset_lane.c
index 6fbaaa7..0508123 100644
--- a/test/CodeGen/arm64_vset_lane.c
+++ b/test/CodeGen/arm64_vset_lane.c
@@ -6,13 +6,15 @@
 float16x4_t test_vset_lane_f16(float16_t *a1, float16x4_t a2) {
   // CHECK-LABEL: test_vset_lane_f16
   return vset_lane_f16(*a1, a2, 1);
-  // CHECK insertelement <4 x i16> %a2, i16 %a1, i32 1
+  // CHECK: [[A1:%[0-9]+]] = load i16, i16* %a1
+  // CHECK: insertelement <4 x i16> %a2, i16 [[A1]], i32 1
 }
 
 float16x8_t test_vsetq_lane_f16(float16_t *a1, float16x8_t a2) {
   // CHECK-LABEL: test_vsetq_lane_f16
   return vsetq_lane_f16(*a1, a2, 4);
-  // CHECK insertelement <8 x i16> %a2, i16 %a1, i32 4
+  // CHECK: [[A1:%[0-9]+]] = load i16, i16* %a1
+  // CHECK: insertelement <8 x i16> %a2, i16 [[A1]], i32 4
 }
 
 // problem with scalar_to_vector in backend.  Punt for now
@@ -27,5 +29,5 @@
 float64x2_t test_vsetq_lane_f64(float64_t a1, float64x2_t a2) {
   // CHECK-LABEL: test_vsetq_lane_f64
   return vsetq_lane_f64(a1, a2, 0);
-  // CHECK insertelement <2 x double> %a2, double %a1, i32 0
+  // CHECK: insertelement <2 x double> %a2, double %a1, i32 0
 }
diff --git a/test/CodeGen/arm64_vtst.c b/test/CodeGen/arm64_vtst.c
index f40c62c..9f3ed84 100644
--- a/test/CodeGen/arm64_vtst.c
+++ b/test/CodeGen/arm64_vtst.c
@@ -4,18 +4,18 @@
 #include <arm_neon.h>
 
 uint64x2_t test_vtstq_s64(int64x2_t a1, int64x2_t a2) {
-  // CHECK: test_vtstq_s64
+  // CHECK-LABEL: test_vtstq_s64
   return vtstq_s64(a1, a2);
-  // CHECK: [[COMMONBITS:%[A-Za-z0-9.]+]] = and <2 x i64> %a1, %a2
+  // CHECK: [[COMMONBITS:%[A-Za-z0-9.]+]] = and <2 x i64> {{%a1, %a2|%a2, %a1}}
   // CHECK: [[MASK:%[A-Za-z0-9.]+]] = icmp ne <2 x i64> [[COMMONBITS]], zeroinitializer
   // CHECK: [[RES:%[A-Za-z0-9.]+]] = sext <2 x i1> [[MASK]] to <2 x i64>
   // CHECK: ret <2 x i64> [[RES]]
 }
 
 uint64x2_t test_vtstq_u64(uint64x2_t a1, uint64x2_t a2) {
-  // CHECK: test_vtstq_u64
+  // CHECK-LABEL: test_vtstq_u64
   return vtstq_u64(a1, a2);
-  // CHECK: [[COMMONBITS:%[A-Za-z0-9.]+]] = and <2 x i64> %a1, %a2
+  // CHECK: [[COMMONBITS:%[A-Za-z0-9.]+]] = and <2 x i64> {{%a1, %a2|%a2, %a1}}
   // CHECK: [[MASK:%[A-Za-z0-9.]+]] = icmp ne <2 x i64> [[COMMONBITS]], zeroinitializer
   // CHECK: [[RES:%[A-Za-z0-9.]+]] = sext <2 x i1> [[MASK]] to <2 x i64>
   // CHECK: ret <2 x i64> [[RES]]
diff --git a/test/CodeGen/asm-inout.c b/test/CodeGen/asm-inout.c
index c7d1aec..e5da5c5 100644
--- a/test/CodeGen/asm-inout.c
+++ b/test/CodeGen/asm-inout.c
@@ -12,7 +12,7 @@
 // CHECK: @test2
 void test2() {
   // CHECK: [[REGCALLRESULT:%[a-zA-Z0-9\.]+]] = call i32* @foo()
-  // CHECK: load i32* [[REGCALLRESULT]]
+  // CHECK: load i32, i32* [[REGCALLRESULT]]
   // CHECK: call i32 asm
   // CHECK: store i32 {{%[a-zA-Z0-9\.]+}}, i32* [[REGCALLRESULT]]
   asm ("foobar" : "+r"(*foo()));
diff --git a/test/CodeGen/asm-reg-var-local.c b/test/CodeGen/asm-reg-var-local.c
index 44417d4..56dcab4 100644
--- a/test/CodeGen/asm-reg-var-local.c
+++ b/test/CodeGen/asm-reg-var-local.c
@@ -16,11 +16,11 @@
 // CHECK:  store i32 42, i32* [[A]]
 
   asm volatile("; %0 This asm uses rsi" : : "r"(a));
-// CHECK:  [[TMP:%[a-zA-Z0-9]+]] = load i32* [[A]]
+// CHECK:  [[TMP:%[a-zA-Z0-9]+]] = load i32, i32* [[A]]
 // CHECK:  call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 [[TMP]])
 
   return a;
-// CHECK:  [[TMP1:%[a-zA-Z0-9]+]] = load i32* [[A]]
+// CHECK:  [[TMP1:%[a-zA-Z0-9]+]] = load i32, i32* [[A]]
 // CHECK:  ret i32 [[TMP1]]
 }
 
@@ -39,10 +39,10 @@
 // CHECK:  store i32 42, i32* [[A]]
 
   asm volatile("; %0 This asm uses rsi" : : "r"(a));
-// CHECK:  [[TMP:%[a-zA-Z0-9]+]] = load i32* [[A]]
+// CHECK:  [[TMP:%[a-zA-Z0-9]+]] = load i32, i32* [[A]]
 // CHECK:  call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 [[TMP]])
 
   return a;
-// CHECK:  [[TMP1:%[a-zA-Z0-9]+]] = load i32* [[A]]
+// CHECK:  [[TMP1:%[a-zA-Z0-9]+]] = load i32, i32* [[A]]
 // CHECK:  ret i32 [[TMP1]]
 }
diff --git a/test/CodeGen/atomic-arm64.c b/test/CodeGen/atomic-arm64.c
index 147b570..98f27ab 100644
--- a/test/CodeGen/atomic-arm64.c
+++ b/test/CodeGen/atomic-arm64.c
@@ -24,7 +24,7 @@
 // CHECK:    define void @test0()
 // CHECK:      [[TEMP:%.*]] = alloca i8, align 1
 // CHECK-NEXT: store i8 1, i8* [[TEMP]]
-// CHECK-NEXT: [[T0:%.*]] = load i8* [[TEMP]], align 1
+// CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[TEMP]], align 1
 // CHECK-NEXT: store atomic i8 [[T0]], i8* @a_bool seq_cst, align 1
 void test0() {
   __c11_atomic_store(&a_bool, 1, memory_order_seq_cst);
@@ -34,7 +34,7 @@
 // CHECK:      [[TEMP:%.*]] = alloca float, align 4
 // CHECK-NEXT: store float 3.000000e+00, float* [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast float* [[TEMP]] to i32*
-// CHECK-NEXT: [[T1:%.*]] = load i32* [[T0]], align 4
+// CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[T0]], align 4
 // CHECK-NEXT: store atomic i32 [[T1]], i32* bitcast (float* @a_float to i32*) seq_cst, align 4
 void test1() {
   __c11_atomic_store(&a_float, 3, memory_order_seq_cst);
@@ -44,7 +44,7 @@
 // CHECK:      [[TEMP:%.*]] = alloca i8*, align 8
 // CHECK-NEXT: store i8* @a_bool, i8** [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast i8** [[TEMP]] to i64*
-// CHECK-NEXT: [[T1:%.*]] = load i64* [[T0]], align 8
+// CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 8
 // CHECK-NEXT: store atomic i64 [[T1]], i64* bitcast (i8** @a_pointer to i64*) seq_cst, align 8
 void test2() {
   __c11_atomic_store(&a_pointer, &a_bool, memory_order_seq_cst);
@@ -55,7 +55,7 @@
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[PAIR_T]], align 8
 // CHECK:      llvm.memcpy
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[PAIR_T]]* [[TEMP]] to i128*
-// CHECK-NEXT: [[T1:%.*]] = load i128* [[T0]], align 16
+// CHECK-NEXT: [[T1:%.*]] = load i128, i128* [[T0]], align 16
 // CHECK-NEXT: store atomic i128 [[T1]], i128* bitcast ([[PAIR_T]]* @a_pointer_pair to i128*) seq_cst, align 16
 void test3(pointer_pair_t pair) {
   __c11_atomic_store(&a_pointer_pair, pair, memory_order_seq_cst);
diff --git a/test/CodeGen/atomic-ops.c b/test/CodeGen/atomic-ops.c
index 559b135..733c60e 100644
--- a/test/CodeGen/atomic-ops.c
+++ b/test/CodeGen/atomic-ops.c
@@ -13,13 +13,13 @@
 
 int fi1(_Atomic(int) *i) {
   // CHECK-LABEL: @fi1
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   return __c11_atomic_load(i, memory_order_seq_cst);
 }
 
 int fi1a(int *i) {
   // CHECK-LABEL: @fi1a
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   int v;
   __atomic_load(i, &v, memory_order_seq_cst);
   return v;
@@ -27,13 +27,13 @@
 
 int fi1b(int *i) {
   // CHECK-LABEL: @fi1b
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   return __atomic_load_n(i, memory_order_seq_cst);
 }
 
 int fi1c(atomic_int *i) {
   // CHECK-LABEL: @fi1c
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   return atomic_load(i);
 }
 
@@ -148,7 +148,7 @@
 
 float ff1(_Atomic(float) *d) {
   // CHECK-LABEL: @ff1
-  // CHECK: load atomic i32* {{.*}} monotonic
+  // CHECK: load atomic i32, i32* {{.*}} monotonic
   return __c11_atomic_load(d, memory_order_relaxed);
 }
 
@@ -184,11 +184,11 @@
   // CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4
   // CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
-  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
   // CHECK-NEXT: ret void
   __atomic_store(a, b, memory_order_seq_cst);
@@ -202,12 +202,12 @@
   // CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
   // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
-  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
   // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
   // CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4
@@ -223,13 +223,13 @@
   // CHECK:      store %struct.S* %a, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4
-  // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
+  // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
   // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8*
   // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
-  // CHECK-NEXT: [[LOAD_C:%.*]] = load i64* [[COERCED_C]], align 4
+  // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, i64* [[COERCED_C]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]]
   // CHECK-NEXT: ret i1 [[CALL]]
   return __atomic_compare_exchange(a, b, c, 1, 5, 5);
@@ -237,7 +237,7 @@
 
 int* fp1(_Atomic(int*) *p) {
   // CHECK-LABEL: @fp1
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   return __c11_atomic_load(p, memory_order_seq_cst);
 }
 
@@ -388,7 +388,7 @@
   // CHECK: %[[call1:.*]] = call zeroext i1 @__atomic_compare_exchange(i32 3, {{.*}} @smallThing{{.*}} @thing1{{.*}} @thing2
   // CHECK: %[[zext1:.*]] = zext i1 %[[call1]] to i8
   // CHECK: store i8 %[[zext1]], i8* %[[x_mem]], align 1
-  // CHECK: %[[x:.*]] = load i8* %[[x_mem]]
+  // CHECK: %[[x:.*]] = load i8, i8* %[[x_mem]]
   // CHECK: %[[x_bool:.*]] = trunc i8 %[[x]] to i1
   // CHECK: %[[conv1:.*]] = zext i1 %[[x_bool]] to i32
 
@@ -558,11 +558,11 @@
   // CHECK: %[[atomictmp:.*]] = alloca i32, align 4
   // CHECK: %[[atomicdst:.*]] = alloca i32, align 4
   // CHECK: store i32 1, i32* %[[atomictmp]]
-  // CHECK: %[[one:.*]] = load i32* %[[atomictmp]], align 4
+  // CHECK: %[[one:.*]] = load i32, i32* %[[atomictmp]], align 4
   // CHECK: %[[old:.*]] = atomicrmw or i32 addrspace(257)* inttoptr (i32 776 to i32 addrspace(257)*), i32 %[[one]] monotonic
   // CHECK: %[[new:.*]] = or i32 %[[old]], %[[one]]
   // CHECK: store i32 %[[new]], i32* %[[atomicdst]], align 4
-  // CHECK: %[[ret:.*]] = load i32* %[[atomicdst]], align 4
+  // CHECK: %[[ret:.*]] = load i32, i32* %[[atomicdst]], align 4
   // CHECK: ret i32 %[[ret]]
 }
 
@@ -571,10 +571,10 @@
   // CHECK:      %[[i_addr:.*]] = alloca i32
   // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
   // CHECK-NEXT: store i32* %i, i32** %[[i_addr]]
-  // CHECK-NEXT: %[[addr:.*]] = load i32** %[[i_addr]]
-  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32* %[[addr]] seq_cst
+  // CHECK-NEXT: %[[addr:.*]] = load i32*, i32** %[[i_addr]]
+  // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] seq_cst
   // CHECK-NEXT: store i32 %[[res]], i32* %[[atomicdst]]
-  // CHECK-NEXT: %[[retval:.*]] = load i32* %[[atomicdst]]
+  // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
   // CHECK-NEXT: ret i32 %[[retval]]
   return __c11_atomic_load(i, memory_order_seq_cst);
 }
@@ -587,14 +587,14 @@
   // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
   // CHECK-NEXT: store i32* %i, i32** %[[i_addr]]
   // CHECK-NEXT: store i32 %value, i32* %[[value_addr]]
-  // CHECK-NEXT: %[[i_lval:.*]] = load i32** %[[i_addr]]
-  // CHECK-NEXT: %[[value:.*]] = load i32* %[[value_addr]]
+  // CHECK-NEXT: %[[i_lval:.*]] = load i32*, i32** %[[i_addr]]
+  // CHECK-NEXT: %[[value:.*]] = load i32, i32* %[[value_addr]]
   // CHECK-NEXT: store i32 %[[value]], i32* %[[atomictmp]]
-  // CHECK-NEXT: %[[value_lval:.*]] = load i32* %[[atomictmp]]
+  // CHECK-NEXT: %[[value_lval:.*]] = load i32, i32* %[[atomictmp]]
   // CHECK-NEXT: %[[old_val:.*]] = atomicrmw volatile add i32* %[[i_lval]], i32 %[[value_lval]] seq_cst
   // CHECK-NEXT: %[[new_val:.*]] = add i32 %[[old_val]], %[[value_lval]]
   // CHECK-NEXT: store i32 %[[new_val]], i32* %[[atomicdst]]
-  // CHECK-NEXT: %[[retval:.*]] = load i32* %[[atomicdst]]
+  // CHECK-NEXT: %[[retval:.*]] = load i32, i32* %[[atomicdst]]
   // CHECK-NEXT: ret i32 %[[retval]]
   return __atomic_add_fetch(i, value, memory_order_seq_cst);
 }
diff --git a/test/CodeGen/atomic_ops.c b/test/CodeGen/atomic_ops.c
index 050b543..980ecd2 100644
--- a/test/CodeGen/atomic_ops.c
+++ b/test/CodeGen/atomic_ops.c
@@ -1,3 +1,4 @@
+// XFAIL: hexagon
 // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -triple mips-linux-gnu -emit-llvm %s -o - | FileCheck %s
 
@@ -22,7 +23,7 @@
 
 _Bool bar() {
 // CHECK-LABEL: @bar
-// CHECK: %[[load:.*]] = load atomic i8* @b seq_cst
+// CHECK: %[[load:.*]] = load atomic i8, i8* @b seq_cst
 // CHECK: %[[tobool:.*]] = trunc i8 %[[load]] to i1
 // CHECK: ret i1 %[[tobool]]
   return b;
diff --git a/test/CodeGen/atomics-inlining.c b/test/CodeGen/atomics-inlining.c
index 9cd2802..9fdad4f 100644
--- a/test/CodeGen/atomics-inlining.c
+++ b/test/CodeGen/atomics-inlining.c
@@ -40,54 +40,54 @@
 // ARM: call{{.*}} void @__atomic_store_4(i8* bitcast (i32* @i1 to i8*), i32
 // ARM: = call{{.*}} i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
 // ARM: call{{.*}} void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
-// ARM: call{{.*}} void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
-// ARM: call{{.*}} void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// ARM: call{{.*}} void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
+// ARM: call{{.*}} void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // PPC32-LABEL: define void @test1
-// PPC32: = load atomic i8* @c1 seq_cst
+// PPC32: = load atomic i8, i8* @c1 seq_cst
 // PPC32: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// PPC32: = load atomic i16* @s1 seq_cst
+// PPC32: = load atomic i16, i16* @s1 seq_cst
 // PPC32: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// PPC32: = load atomic i32* @i1 seq_cst
+// PPC32: = load atomic i32, i32* @i1 seq_cst
 // PPC32: store atomic i32 {{.*}}, i32* @i1 seq_cst
 // PPC32: = call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
 // PPC32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
-// PPC32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
-// PPC32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC32: call void @__atomic_load(i32 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
+// PPC32: call void @__atomic_store(i32 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // PPC64-LABEL: define void @test1
-// PPC64: = load atomic i8* @c1 seq_cst
+// PPC64: = load atomic i8, i8* @c1 seq_cst
 // PPC64: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// PPC64: = load atomic i16* @s1 seq_cst
+// PPC64: = load atomic i16, i16* @s1 seq_cst
 // PPC64: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// PPC64: = load atomic i32* @i1 seq_cst
+// PPC64: = load atomic i32, i32* @i1 seq_cst
 // PPC64: store atomic i32 {{.*}}, i32* @i1 seq_cst
-// PPC64: = load atomic i64* @ll1 seq_cst
+// PPC64: = load atomic i64, i64* @ll1 seq_cst
 // PPC64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
-// PPC64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
-// PPC64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// PPC64: call void @__atomic_load(i64 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
+// PPC64: call void @__atomic_store(i64 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // MIPS32-LABEL: define void @test1
-// MIPS32: = load atomic i8* @c1 seq_cst
+// MIPS32: = load atomic i8, i8* @c1 seq_cst
 // MIPS32: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// MIPS32: = load atomic i16* @s1 seq_cst
+// MIPS32: = load atomic i16, i16* @s1 seq_cst
 // MIPS32: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// MIPS32: = load atomic i32* @i1 seq_cst
+// MIPS32: = load atomic i32, i32* @i1 seq_cst
 // MIPS32: store atomic i32 {{.*}}, i32* @i1 seq_cst
 // MIPS32: call i64 @__atomic_load_8(i8* bitcast (i64* @ll1 to i8*)
 // MIPS32: call void @__atomic_store_8(i8* bitcast (i64* @ll1 to i8*), i64
-// MIPS32: call void @__atomic_load(i32 zeroext 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
-// MIPS32: call void @__atomic_store(i32 zeroext 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// MIPS32: call void @__atomic_load(i32 zeroext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
+// MIPS32: call void @__atomic_store(i32 zeroext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 
 // MIPS64-LABEL: define void @test1
-// MIPS64: = load atomic i8* @c1 seq_cst
+// MIPS64: = load atomic i8, i8* @c1 seq_cst
 // MIPS64: store atomic i8 {{.*}}, i8* @c1 seq_cst
-// MIPS64: = load atomic i16* @s1 seq_cst
+// MIPS64: = load atomic i16, i16* @s1 seq_cst
 // MIPS64: store atomic i16 {{.*}}, i16* @s1 seq_cst
-// MIPS64: = load atomic i32* @i1 seq_cst
+// MIPS64: = load atomic i32, i32* @i1 seq_cst
 // MIPS64: store atomic i32 {{.*}}, i32* @i1 seq_cst
-// MIPS64: = load atomic i64* @ll1 seq_cst
+// MIPS64: = load atomic i64, i64* @ll1 seq_cst
 // MIPS64: store atomic i64 {{.*}}, i64* @ll1 seq_cst
-// MIPS64: call void @__atomic_load(i64 zeroext 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0)
-// MIPS64: call void @__atomic_store(i64 zeroext 100, i8* getelementptr inbounds ([100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8]* @a2, i32 0, i32 0)
+// MIPS64: call void @__atomic_load(i64 zeroext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0)
+// MIPS64: call void @__atomic_store(i64 zeroext 100, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a1, i32 0, i32 0), i8* getelementptr inbounds ([100 x i8], [100 x i8]* @a2, i32 0, i32 0)
 }
diff --git a/test/CodeGen/attributes.c b/test/CodeGen/attributes.c
index 5c9c90d..4da3eca 100644
--- a/test/CodeGen/attributes.c
+++ b/test/CodeGen/attributes.c
@@ -79,7 +79,7 @@
 void t21(void) {
   fptr(10);
 }
-// CHECK: [[FPTRVAR:%[a-z0-9]+]] = load void (i32)** @fptr
+// CHECK: [[FPTRVAR:%[a-z0-9]+]] = load void (i32)*, void (i32)** @fptr
 // CHECK-NEXT: call x86_fastcallcc void [[FPTRVAR]](i32 inreg 10)
 
 
diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c
index 28e11ba..99d0633 100644
--- a/test/CodeGen/avx-builtins.c
+++ b/test/CodeGen/avx-builtins.c
@@ -10,17 +10,17 @@
 //
 
 __m256 test__mm256_loadu_ps(void* p) {
-  // CHECK: load <8 x float>* %{{.*}}, align 1
+  // CHECK: load <8 x float>, <8 x float>* %{{.*}}, align 1
   return _mm256_loadu_ps(p);
 }
 
 __m256d test__mm256_loadu_pd(void* p) {
-  // CHECK: load <4 x double>* %{{.*}}, align 1
+  // CHECK: load <4 x double>, <4 x double>* %{{.*}}, align 1
   return _mm256_loadu_pd(p);
 }
 
 __m256i test__mm256_loadu_si256(void* p) {
-  // CHECK: load <4 x i64>* %{{.+}}, align 1
+  // CHECK: load <4 x i64>, <4 x i64>* %{{.+}}, align 1
   return _mm256_loadu_si256(p);
 }
 
diff --git a/test/CodeGen/avx-shuffle-builtins.c b/test/CodeGen/avx-shuffle-builtins.c
index 76e2395..966a87a 100644
--- a/test/CodeGen/avx-shuffle-builtins.c
+++ b/test/CodeGen/avx-shuffle-builtins.c
@@ -48,19 +48,19 @@
 
 __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) {
   // Check if the mask is correct
-  // CHECK: @llvm.x86.avx.vperm2f128.pd.256
+  // CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7> 
   return _mm256_permute2f128_pd(a, b, 0x31);
 }
 
 __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) {
   // Check if the mask is correct
-  // CHECK: @llvm.x86.avx.vperm2f128.ps.256
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
   return _mm256_permute2f128_ps(a, b, 0x13);
 }
 
 __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) {
   // Check if the mask is correct
-  // CHECK: @llvm.x86.avx.vperm2f128.si.256
+  // CHECK: shufflevector{{.*}} <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   return _mm256_permute2f128_si256(a, b, 0x20);
 }
 
@@ -97,3 +97,80 @@
   // CHECK: insertelement <8 x float> {{.*}}, i32 7
   return _mm256_broadcast_ss(__a);
 }
+
+// Make sure we have the correct mask for each insertf128 case.
+
+__m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
+  // CHECK-LABEL: @test_mm256_insertf128_ps_0
+  // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  return _mm256_insertf128_ps(a, b, 0);
+}
+
+__m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
+  // CHECK-LABEL: @test_mm256_insertf128_pd_0
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  return _mm256_insertf128_pd(a, b, 0);
+}
+
+__m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
+  // CHECK-LABEL: @test_mm256_insertf128_si256_0
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  return _mm256_insertf128_si256(a, b, 0);
+}
+
+__m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
+  // CHECK-LABEL: @test_mm256_insertf128_ps_1
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_insertf128_ps(a, b, 1);
+}
+
+__m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) {
+  // CHECK-LABEL: @test_mm256_insertf128_pd_1
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
+  return _mm256_insertf128_pd(a, b, 1);
+}
+
+__m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
+  // CHECK-LABEL: @test_mm256_insertf128_si256_1
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
+  return _mm256_insertf128_si256(a, b, 1);
+}
+
+// Make sure we have the correct mask for each extractf128 case.
+
+__m128 test_mm256_extractf128_ps_0(__m256 a) {
+  // CHECK-LABEL: @test_mm256_extractf128_ps_0
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
+  return _mm256_extractf128_ps(a, 0);
+}
+
+__m128d test_mm256_extractf128_pd_0(__m256d a) {
+  // CHECK-LABEL: @test_mm256_extractf128_pd_0
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1>
+  return _mm256_extractf128_pd(a, 0);
+}
+
+__m128i test_mm256_extractf128_si256_0(__m256i a) {
+  // CHECK-LABEL: @test_mm256_extractf128_si256_0
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1>
+  return _mm256_extractf128_si256(a, 0);
+}
+
+__m128 test_mm256_extractf128_ps_1(__m256 a) {
+  // CHECK-LABEL: @test_mm256_extractf128_ps_1
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7>
+  return _mm256_extractf128_ps(a, 1);
+}
+
+__m128d test_mm256_extractf128_pd_1(__m256d a) {
+  // CHECK-LABEL: @test_mm256_extractf128_pd_1
+  // CHECK: shufflevector{{.*}}<i32 2, i32 3>
+  return _mm256_extractf128_pd(a, 1);
+}
+
+__m128i test_mm256_extractf128_si256_1(__m256i a) {
+  // CHECK-LABEL: @test_mm256_extractf128_si256_1
+  // CHECK: shufflevector{{.*}}<i32 2, i32 3>
+  return _mm256_extractf128_si256(a, 1);
+}
+
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index 27ee91e..fa5a27c 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -612,7 +612,7 @@
 }
 
 __m256i test_mm256_broadcastsi128_si256(__m128i a) {
-  // CHECK: @llvm.x86.avx2.vbroadcasti128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   return _mm256_broadcastsi128_si256(a);
 }
 
@@ -695,16 +695,44 @@
   return _mm256_permute2x128_si256(a, b, 0x31);
 }
 
-__m128i test_mm256_extracti128_si256(__m256i a) {
-  // CHECK: @llvm.x86.avx2.vextracti128
+__m128i test_mm256_extracti128_si256_0(__m256i a) {
+  // CHECK-LABEL: @test_mm256_extracti128_si256_0
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1>
+  return _mm256_extracti128_si256(a, 0);
+}
+
+__m128i test_mm256_extracti128_si256_1(__m256i a) {
+  // CHECK-LABEL: @test_mm256_extracti128_si256_1
+  // CHECK: shufflevector{{.*}}<i32 2, i32 3>
   return _mm256_extracti128_si256(a, 1);
 }
 
-__m256i test_mm256_inserti128_si256(__m256i a, __m128i b) {
-  // CHECK: @llvm.x86.avx2.vinserti128
+// Immediate should be truncated to one bit.
+__m128i test_mm256_extracti128_si256_2(__m256i a) {
+  // CHECK-LABEL: @test_mm256_extracti128_si256_2
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1>
+  return _mm256_extracti128_si256(a, 2);
+}
+
+__m256i test_mm256_inserti128_si256_0(__m256i a, __m128i b) {
+  // CHECK-LABEL: @test_mm256_inserti128_si256_0
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  return _mm256_inserti128_si256(a, b, 0);
+}
+
+__m256i test_mm256_inserti128_si256_1(__m256i a, __m128i b) {
+  // CHECK-LABEL: @test_mm256_inserti128_si256_1
+  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
   return _mm256_inserti128_si256(a, b, 1);
 }
 
+// Immediate should be truncated to one bit.
+__m256i test_mm256_inserti128_si256_2(__m256i a, __m128i b) {
+  // CHECK-LABEL: @test_mm256_inserti128_si256_2
+  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3>
+  return _mm256_inserti128_si256(a, b, 2);
+}
+
 __m256i test_mm256_maskload_epi32(int const *a, __m256i m) {
   // CHECK: @llvm.x86.avx2.maskload.d.256
   return _mm256_maskload_epi32(a, m);
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index f7f7df5..89f8a53 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -106,14 +106,14 @@
 __m512 test_mm512_loadu_ps(void *p)
 {
   // CHECK-LABEL: @test_mm512_loadu_ps
-  // CHECK: load <16 x float>* {{.*}}, align 1{{$}}
+  // CHECK: load <16 x float>, <16 x float>* {{.*}}, align 1{{$}}
   return _mm512_loadu_ps(p);
 }
 
 __m512d test_mm512_loadu_pd(void *p)
 {
   // CHECK-LABEL: @test_mm512_loadu_pd
-  // CHECK: load <8 x double>* {{.*}}, align 1{{$}}
+  // CHECK: load <8 x double>, <8 x double>* {{.*}}, align 1{{$}}
   return _mm512_loadu_pd(p);
 }
 
diff --git a/test/CodeGen/big-atomic-ops.c b/test/CodeGen/big-atomic-ops.c
index 7409661..28b7b5d 100644
--- a/test/CodeGen/big-atomic-ops.c
+++ b/test/CodeGen/big-atomic-ops.c
@@ -16,13 +16,13 @@
 
 int fi1(_Atomic(int) *i) {
   // CHECK: @fi1
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   return __c11_atomic_load(i, memory_order_seq_cst);
 }
 
 int fi1a(int *i) {
   // CHECK: @fi1a
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   int v;
   __atomic_load(i, &v, memory_order_seq_cst);
   return v;
@@ -30,7 +30,7 @@
 
 int fi1b(int *i) {
   // CHECK: @fi1b
-  // CHECK: load atomic i32* {{.*}} seq_cst
+  // CHECK: load atomic i32, i32* {{.*}} seq_cst
   return __atomic_load_n(i, memory_order_seq_cst);
 }
 
@@ -113,7 +113,7 @@
 
 float ff1(_Atomic(float) *d) {
   // CHECK: @ff1
-  // CHECK: load atomic i32* {{.*}} monotonic
+  // CHECK: load atomic i32, i32* {{.*}} monotonic
   return __c11_atomic_load(d, memory_order_relaxed);
 }
 
@@ -129,7 +129,7 @@
 
 int* fp1(_Atomic(int*) *p) {
   // CHECK: @fp1
-  // CHECK: load atomic i64* {{.*}} seq_cst
+  // CHECK: load atomic i64, i64* {{.*}} seq_cst
   return __c11_atomic_load(p, memory_order_seq_cst);
 }
 
diff --git a/test/CodeGen/block-byref-aggr.c b/test/CodeGen/block-byref-aggr.c
index eed0239..910f6da 100644
--- a/test/CodeGen/block-byref-aggr.c
+++ b/test/CodeGen/block-byref-aggr.c
@@ -16,12 +16,12 @@
 // CHECK:      [[A:%.*]] = alloca [[BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
 // CHECK:      [[RESULT:%.*]] = call i32 @makeAgg()
-// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]]* [[TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
 // CHECK-NEXT: store i32 [[RESULT]], i32* [[T0]]
 //   Check that we properly assign into the forwarding pointer.
-// CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[BYREF]]* [[A]], i32 0, i32 1
-// CHECK-NEXT: [[T0:%.*]] = load [[BYREF]]** [[A_FORWARDING]]
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[BYREF]]* [[T0]], i32 0, i32 4
+// CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[BYREF]], [[BYREF]]* [[A]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = load [[BYREF]]*, [[BYREF]]** [[A_FORWARDING]]
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[BYREF]], [[BYREF]]* [[T0]], i32 0, i32 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false)
@@ -42,19 +42,19 @@
 // CHECK-NEXT: [[B:%.*]] = alloca [[B_BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
 // CHECK:      [[RESULT:%.*]] = call i32 @makeAgg()
-// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]]* [[TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
 // CHECK-NEXT: store i32 [[RESULT]], i32* [[T0]]
 //   Check that we properly assign into the forwarding pointer, first for b:
-// CHECK-NEXT: [[B_FORWARDING:%.*]] = getelementptr inbounds [[B_BYREF]]* [[B]], i32 0, i32 1
-// CHECK-NEXT: [[T0:%.*]] = load [[B_BYREF]]** [[B_FORWARDING]]
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[B_BYREF]]* [[T0]], i32 0, i32 4
+// CHECK-NEXT: [[B_FORWARDING:%.*]] = getelementptr inbounds [[B_BYREF]], [[B_BYREF]]* [[B]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = load [[B_BYREF]]*, [[B_BYREF]]** [[B_FORWARDING]]
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[B_BYREF]], [[B_BYREF]]* [[T0]], i32 0, i32 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false)
 //   Then for 'a':
-// CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[A_BYREF]]* [[A]], i32 0, i32 1
-// CHECK-NEXT: [[T0:%.*]] = load [[A_BYREF]]** [[A_FORWARDING]]
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[A_BYREF]]* [[T0]], i32 0, i32 4
+// CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[A_BYREF]], [[A_BYREF]]* [[A]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = load [[A_BYREF]]*, [[A_BYREF]]** [[A_FORWARDING]]
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[A_BYREF]], [[A_BYREF]]* [[T0]], i32 0, i32 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[AGG]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[AGG]]* [[TEMP]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T2]], i8* [[T3]], i64 4, i32 4, i1 false)
diff --git a/test/CodeGen/blocks-opencl.cl b/test/CodeGen/blocks-opencl.cl
index ab80f5e..d356298 100644
--- a/test/CodeGen/blocks-opencl.cl
+++ b/test/CodeGen/blocks-opencl.cl
@@ -6,7 +6,7 @@
 {
 }
 
-// CHECK: i8 addrspace(3)* getelementptr inbounds ([9 x i8] addrspace(3)* @.str, i32 0, i32 0)
+// CHECK: i8 addrspace(3)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(3)* @.str, i32 0, i32 0)
 
 kernel void test_block()
 {
diff --git a/test/CodeGen/blocks-seq.c b/test/CodeGen/blocks-seq.c
index 8db9e60..a7851d6 100644
--- a/test/CodeGen/blocks-seq.c
+++ b/test/CodeGen/blocks-seq.c
@@ -1,11 +1,11 @@
 // RUN: %clang_cc1 -fblocks -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
 // CHECK: [[Vi:%.+]] = alloca %struct.__block_byref_i, align 8
 // CHECK: call i32 (...)* @rhs()
-// CHECK: [[V7:%.+]] = getelementptr inbounds %struct.__block_byref_i* [[Vi]], i32 0, i32 1
-// CHECK: load %struct.__block_byref_i** [[V7]]
+// CHECK: [[V7:%.+]] = getelementptr inbounds %struct.__block_byref_i, %struct.__block_byref_i* [[Vi]], i32 0, i32 1
+// CHECK: load %struct.__block_byref_i*, %struct.__block_byref_i** [[V7]]
 // CHECK: call i32 (...)* @rhs()
-// CHECK: [[V11:%.+]] = getelementptr inbounds %struct.__block_byref_i* [[Vi]], i32 0, i32 1
-// CHECK: load %struct.__block_byref_i** [[V11]]
+// CHECK: [[V11:%.+]] = getelementptr inbounds %struct.__block_byref_i, %struct.__block_byref_i* [[Vi]], i32 0, i32 1
+// CHECK: load %struct.__block_byref_i*, %struct.__block_byref_i** [[V11]]
 
 int rhs();
 
diff --git a/test/CodeGen/blocks.c b/test/CodeGen/blocks.c
index 5871e8c..2a81826 100644
--- a/test/CodeGen/blocks.c
+++ b/test/CodeGen/blocks.c
@@ -72,7 +72,7 @@
 int main() {
    (b?: ^{})();
 }
-// CHECK: [[ZERO:%.*]] = load void (...)** @b
+// CHECK: [[ZERO:%.*]] = load void (...)*, void (...)** @b
 // CHECK-NEXT: [[TB:%.*]] = icmp ne void (...)* [[ZERO]], null
 // CHECK-NEXT: br i1 [[TB]], label [[CT:%.*]], label [[CF:%.*]]
 // CHECK: [[ONE:%.*]] = bitcast void (...)* [[ZERO]] to void ()*
diff --git a/test/CodeGen/blocksignature.c b/test/CodeGen/blocksignature.c
index fd586eb..b3d2203 100644
--- a/test/CodeGen/blocksignature.c
+++ b/test/CodeGen/blocksignature.c
@@ -7,10 +7,10 @@
 // X64:   store i32 1073741824, i32*
 
 // X32: [[STR1:@.*]] = private unnamed_addr constant [6 x i8] c"v4@?0\00" 
-// X32: @__block_descriptor_tmp = internal constant [[FULL_DESCRIPTOR_T:.*]] { i32 0, i32 20, i8* getelementptr inbounds ([6 x i8]* [[STR1]], i32 0, i32 0), i8* null }
+// X32: @__block_descriptor_tmp = internal constant [[FULL_DESCRIPTOR_T:.*]] { i32 0, i32 20, i8* getelementptr inbounds ([6 x i8], [6 x i8]* [[STR1]], i32 0, i32 0), i8* null }
 // X32: @__block_literal_global = internal constant [[GLOBAL_LITERAL_T:.*]] { i8** @_NSConcreteGlobalBlock, i32 1342177280, i32 0, i8* bitcast (void (i8*)* @global_block_invoke{{.*}} to i8*), [[DESCRIPTOR_T:%.*]]* bitcast ([[FULL_DESCRIPTOR_T]]* @__block_descriptor_tmp to {{%.*}}*) }
 // X32: [[STR2:@.*]] = private unnamed_addr constant [11 x i8] c"i12@?0c4f8\00"
-// X32: @__block_descriptor_tmp{{.*}} = internal constant [[FULL_DESCRIPTOR_T]] { i32 0, i32 24, i8* getelementptr inbounds ([11 x i8]* [[STR2]], i32 0, i32 0), i8* null }
+// X32: @__block_descriptor_tmp{{.*}} = internal constant [[FULL_DESCRIPTOR_T]] { i32 0, i32 24, i8* getelementptr inbounds ([11 x i8], [11 x i8]* [[STR2]], i32 0, i32 0), i8* null }
 // X32:   store i32 1073741824, i32*
 
 // rdar://7635294
diff --git a/test/CodeGen/bool_test.c b/test/CodeGen/bool_test.c
index cf62dba..b48da37 100644
--- a/test/CodeGen/bool_test.c
+++ b/test/CodeGen/bool_test.c
@@ -9,7 +9,7 @@
 }
 
 // CHECK-LABEL: define void @f(
-// CHECK: [[FROMMEM:%.*]] = load i32* %
+// CHECK: [[FROMMEM:%.*]] = load i32, i32* %
 // CHECK: [[BOOLVAL:%.*]] = trunc i32 [[FROMMEM]] to i1
 // CHECK: [[TOMEM:%.*]] = zext i1 [[BOOLVAL]] to i32
 // CHECK: store i32 [[TOMEM]]
diff --git a/test/CodeGen/builtin-assume.c b/test/CodeGen/builtin-assume.c
index 8411b72..8f83f17 100644
--- a/test/CodeGen/builtin-assume.c
+++ b/test/CodeGen/builtin-assume.c
@@ -4,7 +4,7 @@
 // CHECK-LABEL: @test1
 int test1(int *a, int i) {
 // CHECK: store i32* %a, i32** [[A_ADDR:%.+]], align
-// CHECK: [[A:%.+]] = load i32** [[A_ADDR]]
+// CHECK: [[A:%.+]] = load i32*, i32** [[A_ADDR]]
 // CHECK: [[CMP:%.+]] = icmp ne i32* [[A]], null
 // CHECK: call void @llvm.assume(i1 [[CMP]])
 #ifdef _MSC_VER
@@ -14,7 +14,7 @@
 #endif
 
 // Nothing is generated for an assume with side effects...
-// CHECK-NOT: load i32** %i.addr
+// CHECK-NOT: load i32*, i32** %i.addr
 // CHECK-NOT: call void @llvm.assume
 #ifdef _MSC_VER
   __assume(++i != 0)
diff --git a/test/CodeGen/builtin-longjmp.c b/test/CodeGen/builtin-longjmp.c
deleted file mode 100644
index 75c91b8..0000000
--- a/test/CodeGen/builtin-longjmp.c
+++ /dev/null
@@ -1,36 +0,0 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=SUPPORTED
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=SUPPORTED
-// RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=SUPPORTED
-// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=SUPPORTED
-// RUN: %clang_cc1 -triple arm-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=UNSUPPORTED
-// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=UNSUPPORTED
-// RUN: %clang_cc1 -triple mips-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=UNSUPPORTED
-// RUN: %clang_cc1 -triple mips64-unknown-unknown -emit-llvm < %s| FileCheck %s -check-prefix=UNSUPPORTED
-
-// Check that __builtin_longjmp and __builtin_setjmp are lowerd into
-// IR intrinsics on those architectures that can handle them.
-// Check that they are lowered to the libcalls on other architectures.
-
-typedef void *jmp_buf;
-jmp_buf buf;
-
-// SUPPORTED:   define{{.*}} void @do_jump()
-// SUPPORTED:   call{{.*}} void @llvm.eh.sjlj.longjmp
-// UNSUPPORTED: define{{.*}} void @do_jump()
-// UNSUPPORTED: call{{.*}} void @longjmp
-
-// SUPPORTED:   define{{.*}} void @do_setjmp()
-// SUPPORTED:   call{{.*}} i32 @llvm.eh.sjlj.setjmp
-// UNSUPPORTED: define{{.*}} void @do_setjmp()
-// UNSUPPORTED: call{{.*}} i32 @setjmp
-
-void do_jump(void) {
-  __builtin_longjmp(buf, 1);
-}
-
-void f(void);
-
-void do_setjmp(void) {
-  if (!__builtin_setjmp(buf))
-    f();
-}
diff --git a/test/CodeGen/builtins-ppc-crypto-diag.c b/test/CodeGen/builtins-ppc-crypto-diag.c
new file mode 100644
index 0000000..4408ab4
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-crypto-diag.c
@@ -0,0 +1,47 @@
+// REQUIRES: powerpc-registered-target
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T1LW -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T1
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T1MW -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T1
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T2LW -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T2
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T2MW -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T2
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T1LD -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T1
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T1MD -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T1
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T2LD -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T2
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown -D_T2MD -target-feature +crypto -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-T2
+#include <altivec.h>
+
+#define W_INIT { 0x01020304, 0x05060708, \
+                  0x090A0B0C, 0x0D0E0F10 };
+#define D_INIT { 0x0102030405060708, \
+                  0x090A0B0C0D0E0F10 };
+vector unsigned int test_vshasigmaw_or(void)
+{
+  vector unsigned int a = W_INIT
+#ifdef _T1LW // Arg1 too large
+  vector unsigned int b = __builtin_crypto_vshasigmaw(a, 2, 15);
+#elif defined(_T1MW) // Arg1 negative
+  vector unsigned int c = __builtin_crypto_vshasigmaw(a, -1, 15);
+#elif defined(_T2LW) // Arg2 too large
+  vector unsigned int d = __builtin_crypto_vshasigmaw(a, 0, 85);
+#elif defined(_T2MW) // Arg1 negative
+  vector unsigned int e = __builtin_crypto_vshasigmaw(a, 1, -15);
+#endif
+  return __builtin_crypto_vshasigmaw(a, 1, 15);
+}
+
+vector unsigned long long test_vshasigmad_or(void)
+{
+  vector unsigned long long a = D_INIT
+#ifdef _T1LD // Arg1 too large
+  vector unsigned long long b = __builtin_crypto_vshasigmad(a, 2, 15);
+#elif defined(_T1MD) // Arg1 negative
+  vector unsigned long long c = __builtin_crypto_vshasigmad(a, -1, 15);
+#elif defined(_T2LD) // Arg2 too large
+  vector unsigned long long d = __builtin_crypto_vshasigmad(a, 0, 85);
+#elif defined(_T2MD) // Arg1 negative
+  vector unsigned long long e = __builtin_crypto_vshasigmad(a, 1, -15);
+#endif
+  return __builtin_crypto_vshasigmad(a, 0, 15);
+}
+
+// CHECK-T1: error: argument out of range (should be 0-1).
+// CHECK-T2: error: argument out of range (should be 0-15).
diff --git a/test/CodeGen/builtins-ppc-crypto-disabled.c b/test/CodeGen/builtins-ppc-crypto-disabled.c
new file mode 100644
index 0000000..e6a8a93
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-crypto-disabled.c
@@ -0,0 +1,53 @@
+// REQUIRES: powerpc-registered-target
+// RUN: not %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown \
+// RUN: -target-cpu pwr8 -target-feature -crypto -emit-llvm %s -o - 2>&1 \
+// RUN: | FileCheck %s
+
+// RUN: not %clang_cc1 -faltivec -triple powerpc64-unknown-unknown \
+// RUN: -target-cpu pwr8 -target-feature -crypto -emit-llvm %s -o - 2>&1 \
+// RUN: | FileCheck %s
+
+// RUN: not %clang_cc1 -faltivec -triple powerpc64-unknown-unknown \
+// RUN: -target-cpu pwr8 -target-feature -power8-vector \
+// RUN: -target-feature -crypto -emit-llvm %s -o - 2>&1 \
+// RUN: | FileCheck %s -check-prefix=CHECK-P8V
+#include <altivec.h>
+
+#define W_INIT1 { 0x01020304, 0x05060708, \
+                  0x090A0B0C, 0x0D0E0F10 };
+#define D_INIT1 { 0x0102030405060708, \
+                  0x090A0B0C0D0E0F10 };
+#define D_INIT2 { 0x7172737475767778, \
+                  0x797A7B7C7D7E7F70 };
+
+// Test cases for the builtins the way they are exposed to
+// users through altivec.h
+void call_crypto_intrinsics(void)
+{
+  vector unsigned int aw = W_INIT1
+  vector unsigned long long ad = D_INIT1
+  vector unsigned long long bd = D_INIT2
+  vector unsigned long long cd = D_INIT2
+
+  vector unsigned long long r1 = __builtin_crypto_vsbox(ad);
+  vector unsigned long long r2 = __builtin_crypto_vcipher(ad, bd);
+  vector unsigned long long r3 = __builtin_crypto_vcipherlast(ad, bd);
+  vector unsigned long long r4 = __builtin_crypto_vncipher(ad, bd);
+  vector unsigned long long r5 = __builtin_crypto_vncipherlast(ad, bd);
+  vector unsigned int       r6 = __builtin_crypto_vshasigmaw(aw, 1, 15);
+  vector unsigned long long r7 = __builtin_crypto_vshasigmad(ad, 0, 15);
+
+  // The ones that do not require -mcrypto, but require -mpower8-vector
+  vector unsigned long long r8 = __builtin_crypto_vpmsumb(ad, bd);
+  vector unsigned long long r9 = __builtin_crypto_vpermxor(ad, bd, cd);
+}
+
+// CHECK: use of unknown builtin '__builtin_crypto_vsbox'
+// CHECK: use of unknown builtin '__builtin_crypto_vcipher'
+// CHECK: use of unknown builtin '__builtin_crypto_vcipherlast'
+// CHECK: use of unknown builtin '__builtin_crypto_vncipher'
+// CHECK: use of unknown builtin '__builtin_crypto_vncipherlast'
+// CHECK: use of unknown builtin '__builtin_crypto_vshasigmaw'
+// CHECK: use of unknown builtin '__builtin_crypto_vshasigmad'
+// CHECK-P8V: use of unknown builtin '__builtin_crypto_vpmsumb'
+// CHECK-P8V: use of unknown builtin '__builtin_crypto_vpermxor'
diff --git a/test/CodeGen/builtins-ppc-crypto.c b/test/CodeGen/builtins-ppc-crypto.c
new file mode 100644
index 0000000..0ade413
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-crypto.c
@@ -0,0 +1,303 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -faltivec -triple powerpc64le-unknown-unknown \
+// RUN: -target-feature +crypto -target-feature +power8-vector \
+// RUN: -emit-llvm %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -faltivec -triple powerpc64-unknown-unknown \
+// RUN: -target-feature +crypto -target-feature +power8-vector \
+// RUN: -emit-llvm %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -faltivec -triple powerpc-unknown-unknown \
+// RUN: -target-feature +crypto -target-feature +power8-vector \
+// RUN: -emit-llvm %s -o - | FileCheck %s
+#include <altivec.h>
+#define B_INIT1 { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, \
+                  0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10 };
+#define B_INIT2 { 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, \
+                  0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x70 };
+#define H_INIT1 { 0x0102, 0x0304, 0x0506, 0x0708, \
+                  0x090A, 0x0B0C, 0x0D0E, 0x0F10 };
+#define H_INIT2 { 0x7172, 0x7374, 0x7576, 0x7778, \
+                  0x797A, 0x7B7C, 0x7D7E, 0x7F70 };
+#define W_INIT1 { 0x01020304, 0x05060708, \
+                  0x090A0B0C, 0x0D0E0F10 };
+#define W_INIT2 { 0x71727374, 0x75767778, \
+                  0x797A7B7C, 0x7D7E7F70 };
+#define D_INIT1 { 0x0102030405060708, \
+                  0x090A0B0C0D0E0F10 };
+#define D_INIT2 { 0x7172737475767778, \
+                  0x797A7B7C7D7E7F70 };
+
+// CHECK-LABEL: define <16 x i8> @test_vpmsumb
+vector unsigned char test_vpmsumb(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return __builtin_altivec_crypto_vpmsumb(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumb
+}
+
+// CHECK-LABEL: define <8 x i16> @test_vpmsumh
+vector unsigned short test_vpmsumh(void)
+{
+  vector unsigned short a = H_INIT1
+  vector unsigned short b = H_INIT2
+  return __builtin_altivec_crypto_vpmsumh(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumh
+}
+
+// CHECK-LABEL: define <4 x i32> @test_vpmsumw
+vector unsigned int test_vpmsumw(void)
+{
+  vector unsigned int a = W_INIT1
+  vector unsigned int b = W_INIT2
+  return __builtin_altivec_crypto_vpmsumw(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumw
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vpmsumd
+vector unsigned long long test_vpmsumd(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_altivec_crypto_vpmsumd(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumd
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vsbox
+vector unsigned long long test_vsbox(void)
+{
+  vector unsigned long long a = D_INIT1
+  return __builtin_altivec_crypto_vsbox(a);
+// CHECK: @llvm.ppc.altivec.crypto.vsbox
+}
+
+// CHECK-LABEL: define <16 x i8> @test_vpermxorb
+vector unsigned char test_vpermxorb(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  vector unsigned char c = B_INIT2
+  return __builtin_altivec_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
+}
+
+// CHECK-LABEL: define <8 x i16> @test_vpermxorh
+vector unsigned short test_vpermxorh(void)
+{
+  vector unsigned short a = H_INIT1
+  vector unsigned short b = H_INIT2
+  vector unsigned short c = H_INIT2
+  return __builtin_altivec_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
+}
+
+// CHECK-LABEL: define <4 x i32> @test_vpermxorw
+vector unsigned int test_vpermxorw(void)
+{
+  vector unsigned int a = W_INIT1
+  vector unsigned int b = W_INIT2
+  vector unsigned int c = W_INIT2
+  return __builtin_altivec_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vpermxord
+vector unsigned long long test_vpermxord(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  vector unsigned long long c = D_INIT2
+  return __builtin_altivec_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vcipher
+vector unsigned long long test_vcipher(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_altivec_crypto_vcipher(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipher
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vcipherlast
+vector unsigned long long test_vcipherlast(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_altivec_crypto_vcipherlast(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipherlast
+}
+
+// CHECK: @llvm.ppc.altivec.crypto.vncipher
+vector unsigned long long test_vncipher(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_altivec_crypto_vncipher(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipher
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vncipherlast
+vector unsigned long long test_vncipherlast(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_altivec_crypto_vncipherlast(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipherlast
+}
+
+// CHECK-LABEL: define <4 x i32> @test_vshasigmaw
+vector unsigned int test_vshasigmaw(void)
+{
+  vector unsigned int a = W_INIT1
+  return __builtin_altivec_crypto_vshasigmaw(a, 1, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmaw
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vshasigmad
+vector unsigned long long test_vshasigmad(void)
+{
+  vector unsigned long long a = D_INIT2
+  return __builtin_altivec_crypto_vshasigmad(a, 1, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmad
+}
+
+// Test cases for the builtins the way they are exposed to
+// users through altivec.h
+// CHECK-LABEL: define <16 x i8> @test_vpmsumb_e
+vector unsigned char test_vpmsumb_e(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return __builtin_crypto_vpmsumb(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumb
+}
+
+// CHECK-LABEL: define <8 x i16> @test_vpmsumh_e
+vector unsigned short test_vpmsumh_e(void)
+{
+  vector unsigned short a = H_INIT1
+  vector unsigned short b = H_INIT2
+  return __builtin_crypto_vpmsumb(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumh
+}
+
+// CHECK-LABEL: define <4 x i32> @test_vpmsumw_e
+vector unsigned int test_vpmsumw_e(void)
+{
+  vector unsigned int a = W_INIT1
+  vector unsigned int b = W_INIT2
+  return __builtin_crypto_vpmsumb(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumw
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vpmsumd_e
+vector unsigned long long test_vpmsumd_e(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_crypto_vpmsumb(a, b);
+// CHECK @llvm.ppc.altivec.crypto.vpmsumd
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vsbox_e
+vector unsigned long long test_vsbox_e(void)
+{
+  vector unsigned long long a = D_INIT1
+  return __builtin_crypto_vsbox(a);
+// CHECK: @llvm.ppc.altivec.crypto.vsbox
+}
+
+// CHECK-LABEL: define <16 x i8> @test_vpermxorb_e
+vector unsigned char test_vpermxorb_e(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  vector unsigned char c = B_INIT2
+  return __builtin_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
+}
+
+// CHECK-LABEL: define <8 x i16> @test_vpermxorh_e
+vector unsigned short test_vpermxorh_e(void)
+{
+  vector unsigned short a = H_INIT1
+  vector unsigned short b = H_INIT2
+  vector unsigned short c = H_INIT2
+  return __builtin_crypto_vpermxor(a, b, c);
+}
+
+// CHECK-LABEL: define <4 x i32> @test_vpermxorw_e
+vector unsigned int test_vpermxorw_e(void)
+{
+  vector unsigned int a = W_INIT1
+  vector unsigned int b = W_INIT2
+  vector unsigned int c = W_INIT2
+  return __builtin_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vpermxord_e
+vector unsigned long long test_vpermxord_e(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  vector unsigned long long c = D_INIT2
+  return __builtin_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vcipher_e
+vector unsigned long long test_vcipher_e(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_crypto_vcipher(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipher
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vcipherlast_e
+vector unsigned long long test_vcipherlast_e(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_crypto_vcipherlast(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipherlast
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vncipher_e
+vector unsigned long long test_vncipher_e(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_crypto_vncipher(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipher
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vncipherlast_e
+vector unsigned long long test_vncipherlast_e(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return __builtin_crypto_vncipherlast(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipherlast
+}
+
+// CHECK-LABEL: define <4 x i32> @test_vshasigmaw_e
+vector unsigned int test_vshasigmaw_e(void)
+{
+  vector unsigned int a = W_INIT1
+  return __builtin_crypto_vshasigmaw(a, 1, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmaw
+}
+
+// CHECK-LABEL: define <2 x i64> @test_vshasigmad_e
+vector unsigned long long test_vshasigmad_e(void)
+{
+  vector unsigned long long a = D_INIT2
+  return __builtin_crypto_vshasigmad(a, 0, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmad
+}
+
diff --git a/test/CodeGen/builtins-ppc-htm.c b/test/CodeGen/builtins-ppc-htm.c
new file mode 100644
index 0000000..87baa77
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-htm.c
@@ -0,0 +1,62 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -faltivec -target-feature +htm -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+void test1(long int *r, int code, long int *a, long int *b) {
+// CHECK-LABEL: define void @test1
+
+  r[0] = __builtin_tbegin (0);
+// CHECK: @llvm.ppc.tbegin
+  r[1] = __builtin_tbegin (1);
+// CHECK: @llvm.ppc.tbegin
+  r[2] = __builtin_tend (0);
+// CHECK: @llvm.ppc.tend
+  r[3] = __builtin_tendall ();
+// CHECK: @llvm.ppc.tendall
+
+  r[4] = __builtin_tabort (code);
+// CHECK: @llvm.ppc.tabort
+  r[5] = __builtin_tabort (0x1);
+// CHECK: @llvm.ppc.tabort
+  r[6] = __builtin_tabortdc (0xf, a[0], b[0]);
+// CHECK: @llvm.ppc.tabortdc
+  r[7] = __builtin_tabortdci (0xf, a[1], 0x1);
+// CHECK: @llvm.ppc.tabortdc
+  r[8] = __builtin_tabortwc (0xf, a[2], b[2]);
+// CHECK: @llvm.ppc.tabortwc
+  r[9] = __builtin_tabortwci (0xf, a[3], 0x1);
+// CHECK: @llvm.ppc.tabortwc
+
+  r[10] = __builtin_tcheck ();
+// CHECK: @llvm.ppc.tcheck
+  r[11] = __builtin_trechkpt ();
+// CHECK: @llvm.ppc.trechkpt
+  r[12] = __builtin_treclaim (0);
+// CHECK: @llvm.ppc.treclaim
+  r[13] = __builtin_tresume ();
+// CHECK: @llvm.ppc.tresume
+  r[14] = __builtin_tsuspend ();
+// CHECK: @llvm.ppc.tsuspend
+  r[15] = __builtin_tsr (0);
+// CHECK: @llvm.ppc.tsr
+
+  r[16] = __builtin_ttest ();
+// CHECK: @llvm.ppc.ttest
+
+  r[17] = __builtin_get_texasr ();
+// CHECK: @llvm.ppc.get.texasr
+  r[18] = __builtin_get_texasru ();
+// CHECK: @llvm.ppc.get.texasru
+  r[19] = __builtin_get_tfhar ();
+// CHECK: @llvm.ppc.get.tfhar
+  r[20] = __builtin_get_tfiar ();
+// CHECK: @llvm.ppc.get.tfiar
+
+  __builtin_set_texasr (a[21]);
+// CHECK: @llvm.ppc.set.texasr
+  __builtin_set_texasru (a[22]);
+// CHECK: @llvm.ppc.set.texasru
+  __builtin_set_tfhar (a[23]);
+// CHECK: @llvm.ppc.set.tfhar
+  __builtin_set_tfiar (a[24]);
+// CHECK: @llvm.ppc.set.tfiar
+}
diff --git a/test/CodeGen/builtins-ppc-p8vector.c b/test/CodeGen/builtins-ppc-p8vector.c
new file mode 100644
index 0000000..d3391b5
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-p8vector.c
@@ -0,0 +1,605 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -faltivec -target-feature +power8-vector -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -faltivec -target-feature +power8-vector -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-LE
+// RUN: not %clang_cc1 -faltivec -triple powerpc64-unknown-unknown -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PPC
+
+vector int vi = { -1, 2, -3, 4 };
+vector unsigned int vui = { 1, 2, 3, 4 };
+vector bool long long vbll = { 1, 0 };
+vector long long vll = { 1, 2 };
+vector unsigned long long vull = { 1, 2 };
+
+int res_i;
+vector int res_vi;
+vector unsigned int res_vui;
+vector long long res_vll;
+vector unsigned long long res_vull;
+vector bool long long res_vbll;
+
+// CHECK-LABEL: define void @test1
+void test1() {
+
+  /* vec_cmpeq */
+  res_vbll = vec_cmpeq(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd
+// CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous
+
+  res_vbll = vec_cmpeq(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd
+// CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous
+
+  /* vec_cmpgt */
+  res_vbll = vec_cmpgt(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous
+
+  res_vbll = vec_cmpgt(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud
+// CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous
+
+  /* ----------------------- predicates --------------------------- */
+  /* vec_all_eq */
+  res_i = vec_all_eq(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
+
+  res_i = vec_all_eq(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
+
+  res_i = vec_all_eq(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
+
+  res_i = vec_all_eq(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
+
+  res_i = vec_all_eq(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
+
+  res_i = vec_all_eq(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
+
+  res_i = vec_all_eq(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
+
+  /* vec_all_ne */
+  res_i = vec_all_ne(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
+
+  res_i = vec_all_ne(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
+
+  res_i = vec_all_ne(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
+
+  res_i = vec_all_ne(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
+
+  res_i = vec_all_ne(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
+
+  res_i = vec_all_ne(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
+
+  res_i = vec_all_ne(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
+
+  /* vec_any_eq */
+  res_i = vec_any_eq(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
+
+  res_i = vec_any_eq(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
+
+  res_i = vec_any_eq(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
+
+  res_i = vec_any_eq(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
+
+  res_i = vec_any_eq(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
+
+  res_i = vec_any_eq(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
+
+  res_i = vec_any_eq(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
+
+  /* vec_any_ne */
+  res_i = vec_any_ne(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
+
+  res_i = vec_any_ne(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
+
+  res_i = vec_any_ne(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
+
+  res_i = vec_any_ne(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
+
+  res_i = vec_any_ne(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
+
+  res_i = vec_any_ne(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
+
+  res_i = vec_any_ne(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
+
+  /* vec_all_ge */
+  res_i = vec_all_ge(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
+
+  res_i = vec_all_ge(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
+
+  res_i = vec_all_ge(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
+
+  res_i = vec_all_ge(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
+
+  res_i = vec_all_ge(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
+
+  res_i = vec_all_ge(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
+
+  res_i = vec_all_ge(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
+
+  /* vec_all_gt */
+  res_i = vec_all_gt(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
+
+  res_i = vec_all_gt(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
+
+  res_i = vec_all_gt(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
+
+  res_i = vec_all_gt(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
+
+  res_i = vec_all_gt(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
+
+  res_i = vec_all_gt(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
+
+  res_i = vec_all_gt(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
+
+  /* vec_all_le */
+  res_i = vec_all_le(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_le' is ambiguous
+
+  res_i = vec_all_le(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_le' is ambiguous
+
+  res_i = vec_all_le(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_le' is ambiguous
+
+  res_i = vec_all_le(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_le' is ambiguous
+
+  res_i = vec_all_le(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_le' is ambiguous
+
+  res_i = vec_all_le(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_le' is ambiguous
+
+  res_i = vec_all_le(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_le' is ambiguous
+
+  /* vec_all_lt */
+  res_i = vec_all_lt(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
+
+  res_i = vec_all_lt(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
+
+  res_i = vec_all_lt(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
+
+  res_i = vec_all_lt(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
+
+  res_i = vec_all_lt(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
+
+  res_i = vec_all_lt(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
+
+  res_i = vec_all_lt(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
+
+  /* vec_any_ge */
+  res_i = vec_any_ge(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
+
+  res_i = vec_any_ge(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
+
+  res_i = vec_any_ge(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
+
+  res_i = vec_any_ge(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
+
+  res_i = vec_any_ge(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
+
+  res_i = vec_any_ge(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
+
+  res_i = vec_any_ge(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
+
+  /* vec_any_gt */
+  res_i = vec_any_gt(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
+
+  res_i = vec_any_gt(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
+
+  res_i = vec_any_gt(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
+
+  res_i = vec_any_gt(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
+
+  res_i = vec_any_gt(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
+
+  res_i = vec_any_gt(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
+
+  res_i = vec_any_gt(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
+
+  /* vec_any_le */
+  res_i = vec_any_le(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_le' is ambiguous
+
+  res_i = vec_any_le(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_le' is ambiguous
+
+  res_i = vec_any_le(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_le' is ambiguous
+
+  res_i = vec_any_le(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_le' is ambiguous
+
+  res_i = vec_any_le(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_le' is ambiguous
+
+  res_i = vec_any_le(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_le' is ambiguous
+
+  res_i = vec_any_le(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_le' is ambiguous
+
+  /* vec_any_lt */
+  res_i = vec_any_lt(vll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
+
+  res_i = vec_any_lt(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
+// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
+
+  res_i = vec_any_lt(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
+
+  res_i = vec_any_lt(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
+
+  res_i = vec_any_lt(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
+
+  res_i = vec_any_lt(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
+
+  res_i = vec_any_lt(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
+// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
+
+  /* vec_max */
+  res_vll = vec_max(vll, vll);
+// CHECK: @llvm.ppc.altivec.vmaxsd
+// CHECK-LE: @llvm.ppc.altivec.vmaxsd
+// CHECK-PPC: error: call to 'vec_max' is ambiguous
+
+  res_vll = vec_max(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vmaxsd
+// CHECK-LE: @llvm.ppc.altivec.vmaxsd
+// CHECK-PPC: error: call to 'vec_max' is ambiguous
+
+  res_vll = vec_max(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vmaxsd
+// CHECK-LE: @llvm.ppc.altivec.vmaxsd
+// CHECK-PPC: error: call to 'vec_max' is ambiguous
+
+  res_vull = vec_max(vull, vull);
+// CHECK: @llvm.ppc.altivec.vmaxud
+// CHECK-LE: @llvm.ppc.altivec.vmaxud
+// CHECK-PPC: error: call to 'vec_max' is ambiguous
+
+  res_vull = vec_max(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vmaxud
+// CHECK-LE: @llvm.ppc.altivec.vmaxud
+// CHECK-PPC: error: call to 'vec_max' is ambiguous
+
+  res_vull = vec_max(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vmaxud
+// CHECK-LE: @llvm.ppc.altivec.vmaxud
+// CHECK-PPC: error: call to 'vec_max' is ambiguous
+
+  /* vec_min */
+  res_vll = vec_min(vll, vll);
+// CHECK: @llvm.ppc.altivec.vminsd
+// CHECK-LE: @llvm.ppc.altivec.vminsd
+// CHECK-PPC: error: call to 'vec_min' is ambiguous
+
+  res_vll = vec_min(vbll, vll);
+// CHECK: @llvm.ppc.altivec.vminsd
+// CHECK-LE: @llvm.ppc.altivec.vminsd
+// CHECK-PPC: error: call to 'vec_min' is ambiguous
+
+  res_vll = vec_min(vll, vbll);
+// CHECK: @llvm.ppc.altivec.vminsd
+// CHECK-LE: @llvm.ppc.altivec.vminsd
+// CHECK-PPC: error: call to 'vec_min' is ambiguous
+
+  res_vull = vec_min(vull, vull);
+// CHECK: @llvm.ppc.altivec.vminud
+// CHECK-LE: @llvm.ppc.altivec.vminud
+// CHECK-PPC: error: call to 'vec_min' is ambiguous
+
+  res_vull = vec_min(vbll, vull);
+// CHECK: @llvm.ppc.altivec.vminud
+// CHECK-LE: @llvm.ppc.altivec.vminud
+// CHECK-PPC: error: call to 'vec_min' is ambiguous
+
+  res_vull = vec_min(vull, vbll);
+// CHECK: @llvm.ppc.altivec.vminud
+// CHECK-LE: @llvm.ppc.altivec.vminud
+// CHECK-PPC: error: call to 'vec_min' is ambiguous
+
+  /* vec_mule */
+  res_vll = vec_mule(vi, vi);
+// CHECK: @llvm.ppc.altivec.vmulesw
+// CHECK-LE: @llvm.ppc.altivec.vmulosw
+// CHECK-PPC: error: call to 'vec_mule' is ambiguous
+
+  res_vull = vec_mule(vui , vui);
+// CHECK: @llvm.ppc.altivec.vmuleuw
+// CHECK-LE: @llvm.ppc.altivec.vmulouw
+// CHECK-PPC: error: call to 'vec_mule' is ambiguous
+
+  /* vec_mulo */
+  res_vll = vec_mulo(vi, vi);
+// CHECK: @llvm.ppc.altivec.vmulosw
+// CHECK-LE: @llvm.ppc.altivec.vmulesw
+// CHECK-PPC: error: call to 'vec_mulo' is ambiguous
+
+  res_vull = vec_mulo(vui, vui);
+// CHECK: @llvm.ppc.altivec.vmulouw
+// CHECK-LE: @llvm.ppc.altivec.vmuleuw
+// CHECK-PPC: error: call to 'vec_mulo' is ambiguous
+
+  /* vec_rl */
+  res_vll = vec_rl(vll, vull);
+// CHECK: @llvm.ppc.altivec.vrld
+// CHECK-LE: @llvm.ppc.altivec.vrld
+// CHECK-PPC: error: call to 'vec_rl' is ambiguous
+
+  res_vull = vec_rl(vull, vull);
+// CHECK: @llvm.ppc.altivec.vrld
+// CHECK-LE: @llvm.ppc.altivec.vrld
+// CHECK-PPC: error: call to 'vec_rl' is ambiguous
+
+  /* vec_sl */
+  res_vll = vec_sl(vll, vull);
+// CHECK: shl <2 x i64>
+// CHECK-LE: shl <2 x i64>
+// CHECK-PPC: error: call to 'vec_sl' is ambiguous
+
+  res_vull = vec_sl(vull, vull);
+// CHECK: shl <2 x i64>
+// CHECK-LE: shl <2 x i64>
+// CHECK-PPC: error: call to 'vec_sl' is ambiguous
+
+  /* vec_sr */
+  res_vll = vec_sr(vll, vull);
+// CHECK: ashr <2 x i64>
+// CHECK-LE: ashr <2 x i64>
+// CHECK-PPC: error: call to 'vec_sr' is ambiguous
+
+  res_vull = vec_sr(vull, vull);
+// CHECK: lshr <2 x i64>
+// CHECK-LE: lshr <2 x i64>
+// CHECK-PPC: error: call to 'vec_sr' is ambiguous
+
+  /* vec_sra */
+  res_vll = vec_sra(vll, vull);
+// CHECK: ashr <2 x i64>
+// CHECK-LE: ashr <2 x i64>
+// CHECK-PPC: error: call to 'vec_sra' is ambiguous
+
+  res_vull = vec_sra(vull, vull);
+// CHECK: ashr <2 x i64>
+// CHECK-LE: ashr <2 x i64>
+// CHECK-PPC: error: call to 'vec_sra' is ambiguous
+
+}
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index e965f0c..8a5b5a2 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -354,7 +354,6 @@
   tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
 #ifdef USE_SSE4
   tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
-  tmp_V8s = __builtin_ia32_pblendw128(tmp_V8s, tmp_V8s, imm_i_0_256);
   tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);
   tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f);
   tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i);
@@ -406,9 +405,6 @@
   tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
   tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
-  tmp_V2d = __builtin_ia32_vextractf128_pd256(tmp_V4d, 0x1);
-  tmp_V4f = __builtin_ia32_vextractf128_ps256(tmp_V8f, 0x1);
-  tmp_V4i = __builtin_ia32_vextractf128_si256(tmp_V8i, 0x1);
   tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
@@ -420,9 +416,6 @@
   tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
   tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
-  tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x1);
-  tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x1);
-  tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x1);
   tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);
   tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);
   tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f);
diff --git a/test/CodeGen/builtins.c b/test/CodeGen/builtins.c
index 1ab29a6..39b2c12 100644
--- a/test/CodeGen/builtins.c
+++ b/test/CodeGen/builtins.c
@@ -42,8 +42,6 @@
   P(fpclassify, (0, 1, 2, 3, 4, 1.0));
   P(fpclassify, (0, 1, 2, 3, 4, 1.0f));
   P(fpclassify, (0, 1, 2, 3, 4, 1.0l));
-  // FIXME:
-  //  P(isinf_sign, (1.0));
 
   Q(nan, (""));
   Q(nanf, (""));
@@ -61,6 +59,8 @@
   P(islessgreater, (1., 2.));
   P(isunordered, (1., 2.));
 
+  P(isinf, (1.));
+  P(isinf_sign, (1.));
   P(isnan, (1.));
 
   // Bitwise & Numeric Functions
@@ -177,11 +177,35 @@
   res = __builtin_isinf(D);
   // CHECK:  call double @llvm.fabs.f64(double
   // CHECK:  fcmp oeq double {{.*}}, 0x7FF0000000000000
-  
+
   res = __builtin_isinf(LD);
   // CHECK:  call x86_fp80 @llvm.fabs.f80(x86_fp80
   // CHECK:  fcmp oeq x86_fp80 {{.*}}, 0xK7FFF8000000000000000
-  
+
+  res = __builtin_isinf_sign(F);
+  // CHECK:  %[[ABS:.*]] = call float @llvm.fabs.f32(float %[[ARG:.*]])
+  // CHECK:  %[[ISINF:.*]] = fcmp oeq float %[[ABS]], 0x7FF0000000000000
+  // CHECK:  %[[BITCAST:.*]] = bitcast float %[[ARG]] to i32
+  // CHECK:  %[[ISNEG:.*]] = icmp slt i32 %[[BITCAST]], 0
+  // CHECK:  %[[SIGN:.*]] = select i1 %[[ISNEG]], i32 -1, i32 1
+  // CHECK:  select i1 %[[ISINF]], i32 %[[SIGN]], i32 0
+
+  res = __builtin_isinf_sign(D);
+  // CHECK:  %[[ABS:.*]] = call double @llvm.fabs.f64(double %[[ARG:.*]])
+  // CHECK:  %[[ISINF:.*]] = fcmp oeq double %[[ABS]], 0x7FF0000000000000
+  // CHECK:  %[[BITCAST:.*]] = bitcast double %[[ARG]] to i64
+  // CHECK:  %[[ISNEG:.*]] = icmp slt i64 %[[BITCAST]], 0
+  // CHECK:  %[[SIGN:.*]] = select i1 %[[ISNEG]], i32 -1, i32 1
+  // CHECK:  select i1 %[[ISINF]], i32 %[[SIGN]], i32 0
+
+  res = __builtin_isinf_sign(LD);
+  // CHECK:  %[[ABS:.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 %[[ARG:.*]])
+  // CHECK:  %[[ISINF:.*]] = fcmp oeq x86_fp80 %[[ABS]], 0xK7FFF8000000000000000
+  // CHECK:  %[[BITCAST:.*]] = bitcast x86_fp80 %[[ARG]] to i80
+  // CHECK:  %[[ISNEG:.*]] = icmp slt i80 %[[BITCAST]], 0
+  // CHECK:  %[[SIGN:.*]] = select i1 %[[ISNEG]], i32 -1, i32 1
+  // CHECK:  select i1 %[[ISINF]], i32 %[[SIGN]], i32 0
+
   res = __builtin_isfinite(F);
   // CHECK: fcmp oeq float 
   // CHECK: call float @llvm.fabs.f32(float
@@ -220,6 +244,8 @@
   // CHECK: call x86_fp80 @llvm.fabs.f80(x86_fp80
 }
 
+// __builtin_longjmp isn't supported on all platforms, so only test it on X86.
+#ifdef __x86_64__
 // CHECK-LABEL: define void @test_builtin_longjmp
 void test_builtin_longjmp(void **buffer) {
   // CHECK: [[BITCAST:%.*]] = bitcast
@@ -227,6 +253,7 @@
   __builtin_longjmp(buffer, 1);
   // CHECK-NEXT: unreachable
 }
+#endif
 
 // CHECK-LABEL: define i64 @test_builtin_readcyclecounter
 long long test_builtin_readcyclecounter() {
diff --git a/test/CodeGen/c-strings.c b/test/CodeGen/c-strings.c
index 36934e8..588a716 100644
--- a/test/CodeGen/c-strings.c
+++ b/test/CodeGen/c-strings.c
@@ -7,12 +7,12 @@
 // CHECK: @align = global i8 [[ALIGN:[0-9]+]]
 // ITANIUM: @.str = private unnamed_addr constant [6 x i8] c"hello\00"
 // MSABI: @"\01??_C@_05CJBACGMB@hello?$AA@" = linkonce_odr unnamed_addr constant [6 x i8] c"hello\00", comdat, align 1
-// ITANIUM: @f1.x = internal global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0)
-// MSABI: @f1.x = internal global i8* getelementptr inbounds ([6 x i8]* @"\01??_C@_05CJBACGMB@hello?$AA@", i32 0, i32 0)
+// ITANIUM: @f1.x = internal global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0)
+// MSABI: @f1.x = internal global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01??_C@_05CJBACGMB@hello?$AA@", i32 0, i32 0)
 // CHECK: @f2.x = internal global [6 x i8] c"hello\00", align [[ALIGN]]
 // CHECK: @f3.x = internal global [8 x i8] c"hello\00\00\00", align [[ALIGN]]
-// ITANIUM: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0) }
-// MSABI: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8]* @"\01??_C@_05CJBACGMB@hello?$AA@", i32 0, i32 0) }
+// ITANIUM: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0) }
+// MSABI: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01??_C@_05CJBACGMB@hello?$AA@", i32 0, i32 0) }
 // CHECK: @x = global [3 x i8] c"ola", align [[ALIGN]]
 
 #if defined(__s390x__)
@@ -34,7 +34,7 @@
 void f1() {
   static char *x = "hello";
   bar(x);
-  // CHECK: [[T1:%.*]] = load i8** @f1.x
+  // CHECK: [[T1:%.*]] = load i8*, i8** @f1.x
   // CHECK: call void @bar(i8* [[T1:%.*]])
 }
 
diff --git a/test/CodeGen/c11atomics-ios.c b/test/CodeGen/c11atomics-ios.c
index ad57550..a869982 100644
--- a/test/CodeGen/c11atomics-ios.c
+++ b/test/CodeGen/c11atomics-ios.c
@@ -13,22 +13,22 @@
 // CHECK-NEXT: [[F:%.*]] = alloca float
 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
 
-// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load float*, float** [[FP]]
 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
   __c11_atomic_init(fp, 1.0f);
 
 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
   _Atomic(float) x = 2.0f;
 
-// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load float*, float** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i32*
-// CHECK-NEXT: [[T2:%.*]] = load atomic i32* [[T1]] seq_cst, align 4
+// CHECK-NEXT: [[T2:%.*]] = load atomic i32, i32* [[T1]] seq_cst, align 4
 // CHECK-NEXT: [[T3:%.*]] = bitcast i32 [[T2]] to float
 // CHECK-NEXT: store float [[T3]], float* [[F]]
   float f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
-// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = load float, float* [[F]], align 4
+// CHECK-NEXT: [[T1:%.*]] = load float*, float** [[FP]], align 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast float [[T0]] to i32
 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[T1]] to i32*
 // CHECK-NEXT: store atomic i32 [[T2]], i32* [[T3]] seq_cst, align 4
@@ -46,45 +46,45 @@
 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
 // CHECK-NEXT: store [[CF]]*
 
-// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: [[P:%.*]] = load [[CF]]*, [[CF]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
   __c11_atomic_init(fp, 1.0f);
 
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
   _Atomic(_Complex float) x = 2.0f;
 
-// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[CF]]*, [[CF]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i64*
-// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64, i64* [[T1]] seq_cst, align 8
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[CF]]* [[TMP0]] to i64*
 // CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 1
 // CHECK-NEXT: store float [[R]], float* [[T0]]
 // CHECK-NEXT: store float [[I]], float* [[T1]]
   _Complex float f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]*, [[CF]]** [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP1]], i32 0, i32 1
 // CHECK-NEXT: store float [[R]], float* [[T0]]
 // CHECK-NEXT: store float [[I]], float* [[T1]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[TMP1]] to i64*
-// CHECK-NEXT: [[T1:%.*]] = load i64* [[T0]], align 8
+// CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[T0]], align 8
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[DEST]] to i64*
 // CHECK-NEXT: store atomic i64 [[T1]], i64* [[T2]] seq_cst, align 8
   *fp = f;
@@ -101,40 +101,40 @@
 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
 // CHECK-NEXT: store [[S]]*
 
-// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[P:%.*]] = load [[S]]*, [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   _Atomic(S) x = (S){1,2,3,4};
 
-// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i64*
-// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64, i64* [[T1]] seq_cst, align 8
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[F]] to i64*
 // CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2
   S f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[TMP0]] to i64*
-// CHECK-NEXT: [[T4:%.*]] = load i64* [[T3]], align 8
+// CHECK-NEXT: [[T4:%.*]] = load i64, i64* [[T3]], align 8
 // CHECK-NEXT: [[T5:%.*]] = bitcast [[S]]* [[T0]] to i64*
 // CHECK-NEXT: store atomic i64 [[T4]], i64* [[T5]] seq_cst, align 8
   *fp = f;
@@ -152,49 +152,49 @@
 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
 // CHECK-NEXT: store [[APS]]*
 
-// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[P:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
   _Atomic(PS) x = (PS){1,2,3};
 
-// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i64*
-// CHECK-NEXT: [[T2:%.*]] = load atomic i64* [[T1]] seq_cst, align 8
+// CHECK-NEXT: [[T2:%.*]] = load atomic i64, i64* [[T1]] seq_cst, align 8
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[APS]]* [[TMP0]] to i64*
 // CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
   PS f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
 // CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[TMP1]] to i64*
-// CHECK-NEXT: [[T5:%.*]] = load i64* [[T4]], align 8
+// CHECK-NEXT: [[T5:%.*]] = load i64, i64* [[T4]], align 8
 // CHECK-NEXT: [[T6:%.*]] = bitcast [[APS]]* [[T0]] to i64*
 // CHECK-NEXT: store atomic i64 [[T5]], i64* [[T6]] seq_cst, align 8
   *fp = f;
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
index 376c582..a35eef9 100644
--- a/test/CodeGen/c11atomics.c
+++ b/test/CodeGen/c11atomics.c
@@ -147,23 +147,23 @@
 // CHECK-NEXT: [[TMP1:%.*]] = alloca float
 // CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
 
-// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load float*, float** [[FP]]
 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
   __c11_atomic_init(fp, 1.0f);
 
 // CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
   _Atomic(float) x = 2.0f;
 
-// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load float*, float** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8*
 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5)
-// CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4
+// CHECK-NEXT: [[T3:%.*]] = load float, float* [[TMP0]], align 4
 // CHECK-NEXT: store float [[T3]], float* [[F]]
   float f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
-// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = load float, float* [[F]], align 4
+// CHECK-NEXT: [[T1:%.*]] = load float*, float** [[FP]], align 4
 // CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4
 // CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8*
@@ -182,40 +182,40 @@
 // CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
 // CHECK-NEXT: store [[CF]]*
 
-// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: [[P:%.*]] = load [[CF]]*, [[CF]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
   __c11_atomic_init(fp, 1.0f);
 
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
 // CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
   _Atomic(_Complex float) x = 2.0f;
 
-// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[CF]]*, [[CF]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8*
 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 1
 // CHECK-NEXT: store float [[R]], float* [[T0]]
 // CHECK-NEXT: store float [[I]], float* [[T1]]
   _Complex float f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
-// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
-// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
-// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 0
+// CHECK-NEXT: [[R:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[F]], i32 0, i32 1
+// CHECK-NEXT: [[I:%.*]] = load float, float* [[T0]]
+// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]*, [[CF]]** [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]], [[CF]]* [[TMP1]], i32 0, i32 1
 // CHECK-NEXT: store float [[R]], float* [[T0]]
 // CHECK-NEXT: store float [[I]], float* [[T1]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8*
@@ -236,34 +236,34 @@
 // CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
 // CHECK-NEXT: store [[S]]*
 
-// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[P:%.*]] = load [[S]]*, [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T0]], align 2
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   _Atomic(S) x = (S){1,2,3,4};
 
-// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
   S f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
@@ -289,43 +289,43 @@
 // CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
 // CHECK-NEXT: store [[APS]]*
 
-// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[P:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
 // CHECK-NEXT: store i16 1, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
 // CHECK-NEXT: store i16 3, i16* [[T1]], align 2
   _Atomic(PS) x = (PS){1,2,3};
 
-// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8*
 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
   PS f = *fp;
 
-// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
-// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP1]], i32 0, i32 0
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
@@ -334,16 +334,16 @@
 // CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
   *fp = f;
 
-// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4
+// CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]], align 4
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
 // CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP3]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0
-// CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS, %struct.PS* [[TMP2]], i32 0, i32 0
+// CHECK-NEXT: [[T1:%.*]] = load i16, i16* [[T0]], align 2
 // CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
 // CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
   int a = ((PS)*fp).x;
diff --git a/test/CodeGen/capture-complex-expr-in-block.c b/test/CodeGen/capture-complex-expr-in-block.c
index 83695a8..20e078e 100644
--- a/test/CodeGen/capture-complex-expr-in-block.c
+++ b/test/CodeGen/capture-complex-expr-in-block.c
@@ -14,7 +14,7 @@
 
 // CHECK-LABEL: define internal void @__main_block_invoke
 // CHECK:  [[C1:%.*]] = alloca { double, double }, align 8
-// CHECK:  [[RP:%.*]] = getelementptr inbounds { double, double }* [[C1]], i32 0, i32 0
-// CHECK-NEXT:  [[R:%.*]] = load double* [[RP]]
-// CHECK-NEXT:  [[IP:%.*]] = getelementptr inbounds { double, double }* [[C1]], i32 0, i32 1
-// CHECK-NEXT:  [[I:%.*]] = load double* [[IP]]
+// CHECK:  [[RP:%.*]] = getelementptr inbounds { double, double }, { double, double }* [[C1]], i32 0, i32 0
+// CHECK-NEXT:  [[R:%.*]] = load double, double* [[RP]]
+// CHECK-NEXT:  [[IP:%.*]] = getelementptr inbounds { double, double }, { double, double }* [[C1]], i32 0, i32 1
+// CHECK-NEXT:  [[I:%.*]] = load double, double* [[IP]]
diff --git a/test/CodeGen/captured-statements-nested.c b/test/CodeGen/captured-statements-nested.c
index cd20b5a..6464243 100644
--- a/test/CodeGen/captured-statements-nested.c
+++ b/test/CodeGen/captured-statements-nested.c
@@ -31,63 +31,63 @@
         arr[10][z.a] = 12;
 
         // CHECK1: define internal void @__captured_stmt{{.*}}([[T]]
-        // CHECK1: [[PARAM_ARR_SIZE_REF:%.+]] = getelementptr inbounds [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
-        // CHECK1: [[PARAM_ARR_SIZE:%.+]] = load [[SIZE_TYPE]]* [[PARAM_ARR_SIZE_REF]]
-        // CHECK1: [[ARR_SIZE1_REF:%.+]] = getelementptr inbounds [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 8
-        // CHECK1: [[ARR_SIZE1:%.+]] = load [[SIZE_TYPE]]* [[ARR_SIZE1_REF]]
-        // CHECK1: [[ARR_SIZE2_REF:%.+]] = getelementptr inbounds [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 9
-        // CHECK1: [[ARR_SIZE2:%.+]] = load [[SIZE_TYPE]]* [[ARR_SIZE2_REF]]
+        // CHECK1: [[PARAM_ARR_SIZE_REF:%.+]] = getelementptr inbounds [[T]], [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
+        // CHECK1: [[PARAM_ARR_SIZE:%.+]] = load [[SIZE_TYPE]], [[SIZE_TYPE]]* [[PARAM_ARR_SIZE_REF]]
+        // CHECK1: [[ARR_SIZE1_REF:%.+]] = getelementptr inbounds [[T]], [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 8
+        // CHECK1: [[ARR_SIZE1:%.+]] = load [[SIZE_TYPE]], [[SIZE_TYPE]]* [[ARR_SIZE1_REF]]
+        // CHECK1: [[ARR_SIZE2_REF:%.+]] = getelementptr inbounds [[T]], [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 9
+        // CHECK1: [[ARR_SIZE2:%.+]] = load [[SIZE_TYPE]], [[SIZE_TYPE]]* [[ARR_SIZE2_REF]]
         //
-        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-        // CHECK1-NEXT: load %struct.A**
-        // CHECK1-NEXT: getelementptr inbounds %struct.A*
+        // CHECK1: getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // CHECK1-NEXT: load %struct.A*, %struct.A**
+        // CHECK1-NEXT: getelementptr inbounds %struct.A, %struct.A*
         // CHECK1-NEXT: store i{{.+}} 1
         //
-        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-        // CHECK1-NEXT: load i{{[0-9]+}}**
+        // CHECK1: getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+        // CHECK1-NEXT: load i{{[0-9]+}}*, i{{[0-9]+}}**
         // CHECK1-NEXT: store i{{[0-9]+}} 1
         //
-        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-        // CHECK1-NEXT: load i{{[0-9]+}}**
+        // CHECK1: getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+        // CHECK1-NEXT: load i{{[0-9]+}}*, i{{[0-9]+}}**
         // CHECK1-NEXT: store i{{[0-9]+}} 1
         //
-        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
-        // CHECK1-NEXT: load i{{[0-9]+}}**
-        // CHECK1-NEXT: load i{{[0-9]+}}*
-        // CHECK1-NEXT: getelementptr inbounds [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-        // CHECK1-NEXT: load i{{[0-9]+}}***
-        // CHECK1-NEXT: load i{{[0-9]+}}**
+        // CHECK1: getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
+        // CHECK1-NEXT: load i{{[0-9]+}}*, i{{[0-9]+}}**
+        // CHECK1-NEXT: load i{{[0-9]+}}, i{{[0-9]+}}*
+        // CHECK1-NEXT: getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+        // CHECK1-NEXT: load i{{[0-9]+}}**, i{{[0-9]+}}***
+        // CHECK1-NEXT: load i{{[0-9]+}}*, i{{[0-9]+}}**
         // CHECK1-NEXT: store i{{[0-9]+}}
         //
-        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-        // CHECK1-NEXT: load %struct.A**
-        // CHECK1-NEXT: getelementptr inbounds %struct.A*
+        // CHECK1: getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // CHECK1-NEXT: load %struct.A*, %struct.A**
+        // CHECK1-NEXT: getelementptr inbounds %struct.A, %struct.A*
         // CHECK1-NEXT: store float
         //
-        // CHECK1: getelementptr inbounds [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-        // CHECK1-NEXT: load %struct.A**
-        // CHECK1-NEXT: getelementptr inbounds %struct.A*
+        // CHECK1: getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // CHECK1-NEXT: load %struct.A*, %struct.A**
+        // CHECK1-NEXT: getelementptr inbounds %struct.A, %struct.A*
         // CHECK1-NEXT: store i8 99
         //
-        // CHECK1: [[SIZE_ADDR_REF:%.*]] = getelementptr inbounds [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 7
-        // CHECK1-DAG: [[SIZE_ADDR:%.*]] = load i{{.+}}** [[SIZE_ADDR_REF]]
-        // CHECK1-DAG: [[SIZE:%.*]] = load i{{.+}}* [[SIZE_ADDR]]
+        // CHECK1: [[SIZE_ADDR_REF:%.*]] = getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 7
+        // CHECK1-DAG: [[SIZE_ADDR:%.*]] = load i{{.+}}*, i{{.+}}** [[SIZE_ADDR_REF]]
+        // CHECK1-DAG: [[SIZE:%.*]] = load i{{.+}}, i{{.+}}* [[SIZE_ADDR]]
         // CHECK1-DAG: [[PARAM_ARR_IDX:%.*]] = sub nsw i{{.+}} [[SIZE]], 1
-        // CHECK1-DAG: [[PARAM_ARR_ADDR_REF:%.*]] = getelementptr inbounds [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 6
-        // CHECK1-DAG: [[PARAM_ARR_ADDR:%.*]] = load i{{.+}}*** [[PARAM_ARR_ADDR_REF]]
-        // CHECK1-DAG: [[PARAM_ARR:%.*]] = load i{{.+}}** [[PARAM_ARR_ADDR]]
-        // CHECK1-DAG: [[PARAM_ARR_SIZE_MINUS_1_ADDR:%.*]] = getelementptr inbounds i{{.+}}* [[PARAM_ARR]], i{{.*}}
+        // CHECK1-DAG: [[PARAM_ARR_ADDR_REF:%.*]] = getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 6
+        // CHECK1-DAG: [[PARAM_ARR_ADDR:%.*]] = load i{{.+}}**, i{{.+}}*** [[PARAM_ARR_ADDR_REF]]
+        // CHECK1-DAG: [[PARAM_ARR:%.*]] = load i{{.+}}*, i{{.+}}** [[PARAM_ARR_ADDR]]
+        // CHECK1-DAG: [[PARAM_ARR_SIZE_MINUS_1_ADDR:%.*]] = getelementptr inbounds i{{.+}}, i{{.+}}* [[PARAM_ARR]], i{{.*}}
         // CHECK1: store i{{.+}} 2, i{{.+}}* [[PARAM_ARR_SIZE_MINUS_1_ADDR]]
         //
-        // CHECK1: [[Z_ADDR_REF:%.*]] = getelementptr inbounds [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 2
-        // CHECK1-DAG: [[Z_ADDR:%.*]] = load %struct.A** [[Z_ADDR_REF]]
-        // CHECK1-DAG: [[Z_A_ADDR:%.*]] = getelementptr inbounds %struct.A* [[Z_ADDR]], i{{.+}} 0, i{{.+}} 0
-        // CHECK1-DAG: [[ARR_IDX_2:%.*]] = load i{{.+}}* [[Z_A_ADDR]]
-        // CHECK1-DAG: [[ARR_ADDR_REF:%.*]] = getelementptr inbounds [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 10
-        // CHECK1-DAG: [[ARR_ADDR:%.*]] = load i{{.+}}** [[ARR_ADDR_REF]]
+        // CHECK1: [[Z_ADDR_REF:%.*]] = getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 2
+        // CHECK1-DAG: [[Z_ADDR:%.*]] = load %struct.A*, %struct.A** [[Z_ADDR_REF]]
+        // CHECK1-DAG: [[Z_A_ADDR:%.*]] = getelementptr inbounds %struct.A, %struct.A* [[Z_ADDR]], i{{.+}} 0, i{{.+}} 0
+        // CHECK1-DAG: [[ARR_IDX_2:%.*]] = load i{{.+}}, i{{.+}}* [[Z_A_ADDR]]
+        // CHECK1-DAG: [[ARR_ADDR_REF:%.*]] = getelementptr inbounds [[T]], [[T]]* {{.*}}, i{{.+}} 0, i{{.+}} 10
+        // CHECK1-DAG: [[ARR_ADDR:%.*]] = load i{{.+}}*, i{{.+}}** [[ARR_ADDR_REF]]
         // CHECK1-DAG: [[ARR_IDX_1:%.*]] = mul {{.*}} 10
-        // CHECK1-DAG: [[ARR_10_ADDR:%.*]] = getelementptr inbounds i{{.+}}* [[ARR_ADDR]], i{{.*}} [[ARR_IDX_1]]
-        // CHECK1-DAG: [[ARR_10_Z_A_ADDR:%.*]] = getelementptr inbounds i{{.+}}* [[ARR_10_ADDR]], i{{.*}}
+        // CHECK1-DAG: [[ARR_10_ADDR:%.*]] = getelementptr inbounds i{{.+}}, i{{.+}}* [[ARR_ADDR]], i{{.*}} [[ARR_IDX_1]]
+        // CHECK1-DAG: [[ARR_10_Z_A_ADDR:%.*]] = getelementptr inbounds i{{.+}}, i{{.+}}* [[ARR_10_ADDR]], i{{.*}}
         // CHECK1: store i{{.+}} 12, i{{.+}}* [[ARR_10_Z_A_ADDR]]
       }
     }
@@ -143,12 +143,12 @@
   // CHECK2: [[CapA:%[0-9a-z_.]*]] = getelementptr inbounds {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 7
   //
   // CHECK2: getelementptr inbounds %struct.anon{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK2: load i{{[0-9]+}}**
-  // CHECK2: load i{{[0-9]+}}*
+  // CHECK2: load i{{[0-9]+}}*, i{{[0-9]+}}**
+  // CHECK2: load i{{[0-9]+}}, i{{[0-9]+}}*
   // CHECK2: store i{{[0-9]+}} {{.*}}, i{{[0-9]+}}* [[CapA]]
   //
   // CHECK2: [[CapC:%[0-9a-z_.]*]] = getelementptr inbounds {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 8
-  // CHECK2-NEXT: [[Val:%[0-9a-z_]*]] = load i{{[0-9]+}}* [[C]]
+  // CHECK2-NEXT: [[Val:%[0-9a-z_]*]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[C]]
   // CHECK2-NEXT: store i{{[0-9]+}} [[Val]], i{{[0-9]+}}* [[CapC]]
   //
   // CHECK2: bitcast %struct.__block_byref_d*
diff --git a/test/CodeGen/captured-statements.c b/test/CodeGen/captured-statements.c
index 52747fb..64af3c0 100644
--- a/test/CodeGen/captured-statements.c
+++ b/test/CodeGen/captured-statements.c
@@ -20,15 +20,15 @@
   //
   // CHECK-1: test1
   // CHECK-1: alloca %struct.anon
-  // CHECK-1: getelementptr inbounds %struct.anon*
+  // CHECK-1: getelementptr inbounds %struct.anon, %struct.anon*
   // CHECK-1: store i32* %i
   // CHECK-1: call void @[[HelperName:__captured_stmt[0-9]+]]
 }
 
 // CHECK-1: define internal void @[[HelperName]](%struct.anon
 // CHECK-1:   getelementptr inbounds %struct.anon{{.*}}, i32 0, i32 0
-// CHECK-1:   load i32**
-// CHECK-1:   load i32*
+// CHECK-1:   load i32*, i32**
+// CHECK-1:   load i32, i32*
 // CHECK-1:   add nsw i32
 // CHECK-1:   store i32
 
@@ -70,7 +70,7 @@
   }
   // CHECK-3: test4([[INTPTR_T:i.+]] {{.*}}[[SIZE_ARG:%.+]], [[INTPTR_T]]*
   // CHECK-3: store [[INTPTR_T]] {{.*}}[[SIZE_ARG]], [[INTPTR_T]]* [[SIZE_ADDR:%.+]],
-  // CHECK-3: [[SIZE:%.+]] = load [[INTPTR_T]]* [[SIZE_ADDR]],
+  // CHECK-3: [[SIZE:%.+]] = load [[INTPTR_T]], [[INTPTR_T]]* [[SIZE_ADDR]],
   // CHECK-3: [[REF:%.+]] = getelementptr inbounds
   // CHECK-3: store [[INTPTR_T]] [[SIZE]], [[INTPTR_T]]* [[REF]]
   // CHECK-3: call void @__captured_stmt
@@ -92,6 +92,6 @@
 
 // CHECK-GLOBALS: define internal void @__captured_stmt[[HelperName]]
 // CHECK-GLOBALS-NOT: ret
-// CHECK-GLOBALS:   load i32* @global
-// CHECK-GLOBALS:   load i32* @
-// CHECK-GLOBALS:   load i32* @e
+// CHECK-GLOBALS:   load i32, i32* @global
+// CHECK-GLOBALS:   load i32, i32* @
+// CHECK-GLOBALS:   load i32, i32* @e
diff --git a/test/CodeGen/catch-undef-behavior.c b/test/CodeGen/catch-undef-behavior.c
index c41b37c..1ed288b 100644
--- a/test/CodeGen/catch-undef-behavior.c
+++ b/test/CodeGen/catch-undef-behavior.c
@@ -1,7 +1,8 @@
-// RUN: %clang_cc1 -fsanitize=alignment,null,object-size,shift,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-UBSAN
-// RUN: %clang_cc1 -fsanitize-undefined-trap-on-error -fsanitize=alignment,null,object-size,shift,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-TRAP
+// RUN: %clang_cc1 -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-UBSAN
+// RUN: %clang_cc1 -fsanitize-undefined-trap-on-error -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-TRAP
 // RUN: %clang_cc1 -fsanitize=null -fsanitize-recover=null -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-NULL
 // RUN: %clang_cc1 -fsanitize=signed-integer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-OVERFLOW
+// REQUIRES: asserts
 
 // CHECK-UBSAN: @[[INT:.*]] = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" }
 
@@ -83,15 +84,19 @@
 
 // CHECK-COMMON-LABEL: @lsh_overflow
 int lsh_overflow(int a, int b) {
-  // CHECK-COMMON:      %[[INBOUNDS:.*]] = icmp ule i32 %[[RHS:.*]], 31
-  // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]], label %[[CHECKBB:.*]], label %[[CONTBB:.*]]
+  // CHECK-COMMON:      %[[RHS_INBOUNDS:.*]] = icmp ule i32 %[[RHS:.*]], 31
+  // CHECK-COMMON-NEXT: br i1 %[[RHS_INBOUNDS]], label %[[CHECK_BB:.*]], label %[[CONT_BB:.*]],
 
-  // CHECK-COMMON:      %[[SHIFTED_OUT_WIDTH:.*]] = sub nuw nsw i32 31, %[[RHS]]
+  // CHECK-COMMON:      [[CHECK_BB]]:
+  // CHECK-COMMON-NEXT: %[[SHIFTED_OUT_WIDTH:.*]] = sub nuw nsw i32 31, %[[RHS]]
   // CHECK-COMMON-NEXT: %[[SHIFTED_OUT:.*]] = lshr i32 %[[LHS:.*]], %[[SHIFTED_OUT_WIDTH]]
   // CHECK-COMMON-NEXT: %[[NO_OVERFLOW:.*]] = icmp eq i32 %[[SHIFTED_OUT]], 0
-  // CHECK-COMMON-NEXT: br label %[[CONTBB]]
+  // CHECK-COMMON-NEXT: br label %[[CONT_BB]]
 
-  // CHECK-COMMON:      %[[VALID:.*]] = phi i1 [ %[[INBOUNDS]], {{.*}} ], [ %[[NO_OVERFLOW]], %[[CHECKBB]] ]
+  // CHECK-COMMON:      [[CONT_BB]]:
+  // CHECK-COMMON-NEXT: %[[VALID_BASE:.*]] = phi i1 [ true, {{.*}} ], [ %[[NO_OVERFLOW]], %[[CHECK_BB]] ]
+  // CHECK-COMMON-NEXT: %[[VALID:.*]] = and i1 %[[RHS_INBOUNDS]], %[[VALID_BASE]]
+
   // CHECK-UBSAN: br i1 %[[VALID]], {{.*}} !prof ![[WEIGHT_MD]]
   // CHECK-TRAP:  br i1 %[[VALID]]
 
@@ -122,7 +127,7 @@
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
 
-  // CHECK-COMMON:      %[[RET:.*]] = ashr i32 %[[LHS]], %[[RHS]]
+  // CHECK-COMMON:      %[[RET:.*]] = ashr i32 {{.*}}, %[[RHS]]
   // CHECK-COMMON-NEXT: ret i32 %[[RET]]
 #line 400
   return a >> b;
diff --git a/test/CodeGen/clear_cache.c b/test/CodeGen/clear_cache.c
index ec88c90..7bbcc03 100644
--- a/test/CodeGen/clear_cache.c
+++ b/test/CodeGen/clear_cache.c
@@ -7,6 +7,6 @@
 
 int main() {
   __builtin___clear_cache(buffer, buffer+32);
-// CHECK: @llvm.clear_cache(i8* getelementptr inbounds ({{.*}}, i8* getelementptr inbounds (i8* getelementptr inbounds ({{.*}} 32))
+// CHECK: @llvm.clear_cache(i8* getelementptr inbounds ({{.*}}, i8* getelementptr inbounds (i8, i8* getelementptr inbounds ({{.*}} 32))
   return 0;
 }
diff --git a/test/CodeGen/complex-convert.c b/test/CodeGen/complex-convert.c
index 6ecb884..0db2588 100644
--- a/test/CodeGen/complex-convert.c
+++ b/test/CodeGen/complex-convert.c
@@ -32,690 +32,690 @@
   // CHECK: alloca i[[LLSIZE]], align [[LLALIGN:[0-9]+]]
 
   sc1 = csc;
-  // CHECK: %[[VAR1:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR1]]
+  // CHECK: %[[VAR1:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR1]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR2]], i[[CHSIZE]]* %[[SC1:[A-Za-z0-9.]+]], align [[CHALIGN]]
 
   sc1 = cuc;
-  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CUC:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR3]]
+  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CUC:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR3]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR4]], i[[CHSIZE]]* %[[SC1]], align [[CHALIGN]]
 
   sc1 = csll;
-  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CSLL:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }, { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CSLL:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR5]]
   // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR6]] to i[[CHSIZE]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR7]], i[[CHSIZE]]* %[[SC1]], align [[CHALIGN]]
 
   sc1 = cull;
-  // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CULL:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR8]]
+  // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }, { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CULL:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR8]]
   // CHECK-NEXT: %[[VAR10:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR9]] to i[[CHSIZE]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR10]], i[[CHSIZE]]* %[[SC1]], align [[CHALIGN]]
   
   uc1 = csc;
-  // CHECK-NEXT: %[[VAR11:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR11]]
+  // CHECK-NEXT: %[[VAR11:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR11]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR12]], i[[CHSIZE]]* %[[UC1:[A-Za-z0-9.]+]], align [[CHALIGN]]
 
   uc1 = cuc;
-  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR13]]
+  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR13]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR14]], i[[CHSIZE]]* %[[UC1]], align [[CHALIGN]]
 
   uc1 = csll;
-  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }, { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR15]]
   // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR16]] to i[[CHSIZE]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR17]], i[[CHSIZE]]* %[[UC1]], align [[CHALIGN]]
 
   uc1 = cull;
-  // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR18]]
+  // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]]  }, { i[[LLSIZE]], i[[LLSIZE]]  }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR18]]
   // CHECK-NEXT: %[[VAR20:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR19]] to i[[CHSIZE]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR20]], i[[CHSIZE]]* %[[UC1]], align [[CHALIGN]]
 
   sll1 = csc;
-  // CHECK-NEXT: %[[VAR21:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR22:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR21]]
+  // CHECK-NEXT: %[[VAR21:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR22:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR21]]
   // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR22]] to i[[LLSIZE]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR23]], i[[LLSIZE]]* %[[SLL1:[A-Za-z0-9]+]], align [[LLALIGN]]
 
   sll1 = cuc;
-  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR24]]
+  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR24]]
   // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR25]] to i[[LLSIZE]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR26]], i[[LLSIZE]]* %[[SLL1]], align [[LLALIGN]]
 
   sll1 = csll;
-  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR27]]
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR27]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR28]], i[[LLSIZE]]* %[[SLL1]], align [[LLALIGN]]
 
   sll1 = cull;
-  // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR30:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR29]]
+  // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR30:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR29]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR30]], i[[LLSIZE]]* %[[SLL1]], align [[LLALIGN]]
   
   ull1 = csc;
-  // CHECK-NEXT: %[[VAR31:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR31]]
+  // CHECK-NEXT: %[[VAR31:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR31]]
   // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR32]] to i[[LLSIZE]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR33]], i[[LLSIZE]]* %[[ULL1:[A-Za-z0-9]+]], align [[LLALIGN]]
 
   ull1 = cuc;
-  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR34]]
+  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR34]]
   // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR35]] to i[[LLSIZE]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR36]], i[[LLSIZE]]* %[[ULL1]], align [[LLALIGN]]
 
   ull1 = csll;
-  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR37]]
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR37]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR38]], i[[LLSIZE]]* %[[ULL1]], align [[LLALIGN]]
 
   ull1 = cull;
-  // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR40:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR39]]
+  // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR40:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR39]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR40]], i[[LLSIZE]]* %[[ULL1]], align [[LLALIGN]]
 
   csc1 = sc;
-  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR:[A-Za-z0-9.]+]], align [[CHALIGN]]
-  // CHECK-NEXT: %[[VAR42:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR43:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR:[A-Za-z0-9.]+]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR42:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR43:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR41]], i[[CHSIZE]]* %[[VAR42]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR43]]
 
   csc1 = uc;
-  // CHECK-NEXT: %[[VAR44:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR:[A-Za-z0-9.]+]], align [[CHALIGN]]
-  // CHECK-NEXT: %[[VAR45:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR44:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR:[A-Za-z0-9.]+]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR45:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR44]], i[[CHSIZE]]* %[[VAR45]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR46]]
 
   csc1 = sll;
-  // CHECK-NEXT: %[[VAR47:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR:[A-Za-z0-9.]+]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR47:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR:[A-Za-z0-9.]+]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR48:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR47]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR49:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR50:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR49:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR50:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR48]], i[[CHSIZE]]* %[[VAR49]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR50]]
 
   csc1 = ull;
-  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR:[A-Za-z0-9.]+]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR:[A-Za-z0-9.]+]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR52:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR51]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR53:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR53:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR52]], i[[CHSIZE]]* %[[VAR53]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR54]]
   
   cuc1 = sc;
-  // CHECK-NEXT: %[[VAR55:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
-  // CHECK-NEXT: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR57:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR55:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR57:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR55]], i[[CHSIZE]]* %[[VAR56]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR57]]
 
   cuc1 = uc;
-  // CHECK-NEXT: %[[VAR58:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
-  // CHECK-NEXT: %[[VAR59:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR60:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR58:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR59:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR60:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR58]], i[[CHSIZE]]* %[[VAR59]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR60]]
 
   cuc1 = sll;
-  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR62:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR61]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR63:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR63:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR62]], i[[CHSIZE]]* %[[VAR63]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR64]]
 
   cuc1 = ull;
-  // CHECK-NEXT: %[[VAR65:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR65:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR66:[A-Za-z0-9.]+]] = trunc i[[LLSIZE]] %[[VAR65]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR67:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR68:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR67:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR68:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR66]], i[[CHSIZE]]* %[[VAR67]]
   // CHECK-NEXT: store i[[CHSIZE]] 0, i[[CHSIZE]]* %[[VAR68]]
 
   csll1 = sc;
-  // CHECK-NEXT: %[[VAR69:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR69:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR70:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR69]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR72:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR72:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR70]], i[[LLSIZE]]* %[[VAR71]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR72]]
 
   csll1 = uc;
-  // CHECK-NEXT: %[[VAR73:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR73:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR74:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR73]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR75:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR75:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR74]], i[[LLSIZE]]* %[[VAR75]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR76]]
 
   csll1 = sll;
-  // CHECK-NEXT: %[[VAR77:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR78:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR79:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR77:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR78:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR79:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR77]], i[[LLSIZE]]* %[[VAR78]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR79]]
 
   csll1 = ull;
-  // CHECK-NEXT: %[[VAR77:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR78:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR79:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR77:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR78:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR79:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR77]], i[[LLSIZE]]* %[[VAR78]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR79]]
 
   cull1 = sc;
-  // CHECK-NEXT: %[[VAR80:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR80:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR81:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR80]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR82:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR83:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR82:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR83:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR81]], i[[LLSIZE]]* %[[VAR82]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR83]]
 
   cull1 = uc;
-  // CHECK-NEXT: %[[VAR84:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR84:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR85:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR84]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR86:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR87:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR86:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR87:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR85]], i[[LLSIZE]]* %[[VAR86]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR87]]
 
   cull1 = sll;
-  // CHECK-NEXT: %[[VAR88:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR89:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR90:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR88:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR89:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR90:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR88]], i[[LLSIZE]]* %[[VAR89]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR90]]
 
   cull1 = ull;
-  // CHECK-NEXT: %[[VAR91:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR92:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR93:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR91:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR92:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR93:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR91]], i[[LLSIZE]]* %[[VAR92]]
   // CHECK-NEXT: store i[[LLSIZE]] 0, i[[LLSIZE]]* %[[VAR93]]
 
   csc1 = sc + csc;
-  // CHECK-NEXT: %[[VAR94:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR94:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR95:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR94]] to i[[ARSIZE:[0-9]+]]
-  // CHECK-NEXT: %[[VAR96:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR97:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR96]]
-  // CHECK-NEXT: %[[VAR98:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR99:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR98]]
+  // CHECK-NEXT: %[[VAR96:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR97:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR96]]
+  // CHECK-NEXT: %[[VAR98:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR99:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR98]]
   // CHECK-NEXT: %[[VAR100:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR97]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR101:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR99]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR102:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR95]], %[[VAR100]]
   // CHECK-NEXT: %[[VAR103:[A-Za-z0-9.]+]] = add i[[ARSIZE]] 0, %[[VAR101]]
   // CHECK-NEXT: %[[VAR104:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR102]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR105:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR103]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR106:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR107:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR106:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR107:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR104]], i[[CHSIZE]]* %[[VAR106]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR105]], i[[CHSIZE]]* %[[VAR107]]
 
   cuc1 = sc + cuc;
-  // CHECK-NEXT: %[[VAR108:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR108:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR109:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR108]] to i[[ARSIZE]]
-  // CHECK-NEXT: %[[VAR110:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR111:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR110]]
-  // CHECK-NEXT: %[[VAR112:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR113:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR112]]
+  // CHECK-NEXT: %[[VAR110:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR111:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR110]]
+  // CHECK-NEXT: %[[VAR112:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR113:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR112]]
   // CHECK-NEXT: %[[VAR114:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR111]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR115:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR113]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR116:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR109]], %[[VAR114]]
   // CHECK-NEXT: %[[VAR117:[A-Za-z0-9.]+]] = add i[[ARSIZE]] 0, %[[VAR115]]
   // CHECK-NEXT: %[[VAR118:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR116]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR119:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR117]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR120:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR121:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR120:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR121:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR118]], i[[CHSIZE]]* %[[VAR120]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR119]], i[[CHSIZE]]* %[[VAR121]]
 
   csll1 = sc + csll;
-  // CHECK-NEXT: %[[VAR122:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR122:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR123:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR122]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR124:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR125:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR124]]
-  // CHECK-NEXT: %[[VAR126:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR127:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR126]]
+  // CHECK-NEXT: %[[VAR124:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR125:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR124]]
+  // CHECK-NEXT: %[[VAR126:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR127:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR126]]
   // CHECK-NEXT: %[[VAR128:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR123]], %[[VAR125]]
   // CHECK-NEXT: %[[VAR129:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR127]]
-  // CHECK-NEXT: %[[VAR130:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR131:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR130:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR131:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR128]], i[[LLSIZE]]* %[[VAR130]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR129]], i[[LLSIZE]]* %[[VAR131]]
 
   cull1 = sc + cull;
-  // CHECK-NEXT: %[[VAR132:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR132:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR133:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR132]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR134:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR135:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR134]]
-  // CHECK-NEXT: %[[VAR136:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR137:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR136]]
+  // CHECK-NEXT: %[[VAR134:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR135:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR134]]
+  // CHECK-NEXT: %[[VAR136:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR137:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR136]]
   // CHECK-NEXT: %[[VAR138:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR133]], %[[VAR135]]
   // CHECK-NEXT: %[[VAR139:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR137]]
-  // CHECK-NEXT: %[[VAR140:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR141:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR140:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR141:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR138]], i[[LLSIZE]]* %[[VAR140]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR139]], i[[LLSIZE]]* %[[VAR141]]
   
   csc1 = uc + csc;
-  // CHECK-NEXT: %[[VAR142:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR142:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR143:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR142]] to i[[ARSIZE]]
-  // CHECK-NEXT: %[[VAR144:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR145:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR144]]
-  // CHECK-NEXT: %[[VAR146:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR147:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR146]]
+  // CHECK-NEXT: %[[VAR144:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR145:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR144]]
+  // CHECK-NEXT: %[[VAR146:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR147:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR146]]
   // CHECK-NEXT: %[[VAR148:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR145]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR149:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR147]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR150:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR143]], %[[VAR148]]
   // CHECK-NEXT: %[[VAR151:[A-Za-z0-9.]+]] = add i[[ARSIZE]] 0, %[[VAR149]]
   // CHECK-NEXT: %[[VAR152:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR150]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR153:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR151]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR154:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR155:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR154:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR155:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR152]], i[[CHSIZE]]* %[[VAR154]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR153]], i[[CHSIZE]]* %[[VAR155]]
 
   cuc1 = uc + cuc;
-  // CHECK-NEXT: %[[VAR156:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR156:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR157:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR156]] to i[[ARSIZE]]
-  // CHECK-NEXT: %[[VAR158:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR159:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR158]]
-  // CHECK-NEXT: %[[VAR160:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR161:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR160]]
+  // CHECK-NEXT: %[[VAR158:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR159:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR158]]
+  // CHECK-NEXT: %[[VAR160:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR161:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR160]]
   // CHECK-NEXT: %[[VAR162:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR159]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR163:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR161]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR164:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR157]], %[[VAR162]]
   // CHECK-NEXT: %[[VAR165:[A-Za-z0-9.]+]] = add i[[ARSIZE]] 0, %[[VAR163]]
   // CHECK-NEXT: %[[VAR166:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR164]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR167:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR165]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR168:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR169:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR168:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR169:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR166]], i[[CHSIZE]]* %[[VAR168]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR167]], i[[CHSIZE]]* %[[VAR169]]
 
   csll1 = uc + csll;
-  // CHECK-NEXT: %[[VAR170:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR170:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR171:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR170]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR172:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR173:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR172]]
-  // CHECK-NEXT: %[[VAR174:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR175:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR174]]
+  // CHECK-NEXT: %[[VAR172:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR173:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR172]]
+  // CHECK-NEXT: %[[VAR174:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR175:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR174]]
   // CHECK-NEXT: %[[VAR176:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR171]], %[[VAR173]]
   // CHECK-NEXT: %[[VAR177:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR175]]
-  // CHECK-NEXT: %[[VAR178:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR179:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR178:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR179:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR176]], i[[LLSIZE]]* %[[VAR178]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR177]], i[[LLSIZE]]* %[[VAR179]]
 
   cull1 = uc + cull;
-  // CHECK-NEXT: %[[VAR180:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR180:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR181:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR180]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR182:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR183:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR182]]
-  // CHECK-NEXT: %[[VAR184:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR185:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR184]]
+  // CHECK-NEXT: %[[VAR182:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR183:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR182]]
+  // CHECK-NEXT: %[[VAR184:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR185:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR184]]
   // CHECK-NEXT: %[[VAR186:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR181]], %[[VAR183]]
   // CHECK-NEXT: %[[VAR187:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR185]]
-  // CHECK-NEXT: %[[VAR188:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR189:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR188:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR189:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR186]], i[[LLSIZE]]* %[[VAR188]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR187]], i[[LLSIZE]]* %[[VAR189]]
 
   csll1 = sll + csc;
-  // CHECK-NEXT: %[[VAR190:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR191:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR192:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR191]]
-  // CHECK-NEXT: %[[VAR193:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR194:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR193]]
+  // CHECK-NEXT: %[[VAR190:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR191:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR192:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR191]]
+  // CHECK-NEXT: %[[VAR193:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR194:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR193]]
   // CHECK-NEXT: %[[VAR195:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR192]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR196:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR194]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR197:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR190]], %[[VAR195]]
   // CHECK-NEXT: %[[VAR198:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR196]]
-  // CHECK-NEXT: %[[VAR199:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR200:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR199:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR200:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR197]], i[[LLSIZE]]* %[[VAR199]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR198]], i[[LLSIZE]]* %[[VAR200]]
 
   csll1 = sll + cuc;
-  // CHECK-NEXT: %[[VAR201:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR202:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR203:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR202]]
-  // CHECK-NEXT: %[[VAR204:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR205:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR204]]
+  // CHECK-NEXT: %[[VAR201:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR202:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR203:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR202]]
+  // CHECK-NEXT: %[[VAR204:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR205:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR204]]
   // CHECK-NEXT: %[[VAR206:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR203]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR207:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR205]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR208:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR201]], %[[VAR206]]
   // CHECK-NEXT: %[[VAR209:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR207]]
-  // CHECK-NEXT: %[[VAR210:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR211:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR210:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR211:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR208]], i[[LLSIZE]]* %[[VAR210]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR209]], i[[LLSIZE]]* %[[VAR211]]
 
   csll1 = sll + csll;
-  // CHECK-NEXT: %[[VAR212:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR213:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR214:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR213]]
-  // CHECK-NEXT: %[[VAR215:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR216:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR215]]
+  // CHECK-NEXT: %[[VAR212:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR213:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR214:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR213]]
+  // CHECK-NEXT: %[[VAR215:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR216:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR215]]
   // CHECK-NEXT: %[[VAR217:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR212]], %[[VAR214]]
   // CHECK-NEXT: %[[VAR218:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR216]]
-  // CHECK-NEXT: %[[VAR219:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR220:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR219:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR220:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR217]], i[[LLSIZE]]* %[[VAR219]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR218]], i[[LLSIZE]]* %[[VAR220]]
 
   csll1 = sll + cull;
-  // CHECK-NEXT: %[[VAR221:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR222:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR223:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR222]]
-  // CHECK-NEXT: %[[VAR224:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR225:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR224]]
+  // CHECK-NEXT: %[[VAR221:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR222:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR223:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR222]]
+  // CHECK-NEXT: %[[VAR224:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR225:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR224]]
   // CHECK-NEXT: %[[VAR226:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR221]], %[[VAR223]]
   // CHECK-NEXT: %[[VAR227:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR225]]
-  // CHECK-NEXT: %[[VAR228:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR229:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR228:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR229:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR226]], i[[LLSIZE]]* %[[VAR228]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR227]], i[[LLSIZE]]* %[[VAR229]]
   
   csll1 = ull + csc;
-  // CHECK-NEXT: %[[VAR230:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR231:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR232:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR231]]
-  // CHECK-NEXT: %[[VAR233:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR234:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR233]]
+  // CHECK-NEXT: %[[VAR230:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR231:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR232:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR231]]
+  // CHECK-NEXT: %[[VAR233:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR234:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR233]]
   // CHECK-NEXT: %[[VAR235:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR232]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR236:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR234]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR237:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR230]], %[[VAR235]]
   // CHECK-NEXT: %[[VAR238:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR236]]
-  // CHECK-NEXT: %[[VAR239:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR240:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR239:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR240:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR237]], i[[LLSIZE]]* %[[VAR239]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR238]], i[[LLSIZE]]* %[[VAR240]]
 
   cull1 = ull + cuc;
-  // CHECK-NEXT: %[[VAR241:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR242:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR243:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR242]]
-  // CHECK-NEXT: %[[VAR244:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR245:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR244]]
+  // CHECK-NEXT: %[[VAR241:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR242:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR243:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR242]]
+  // CHECK-NEXT: %[[VAR244:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR245:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR244]]
   // CHECK-NEXT: %[[VAR246:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR243]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR247:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR245]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR248:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR241]], %[[VAR246]]
   // CHECK-NEXT: %[[VAR249:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR247]]
-  // CHECK-NEXT: %[[VAR250:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR251:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR250:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR251:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR248]], i[[LLSIZE]]* %[[VAR250]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR249]], i[[LLSIZE]]* %[[VAR251]]
 
   csll1 = ull + csll;
-  // CHECK-NEXT: %[[VAR252:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR253:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR254:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR253]]
-  // CHECK-NEXT: %[[VAR255:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR256:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR255]]
+  // CHECK-NEXT: %[[VAR252:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR253:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR254:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR253]]
+  // CHECK-NEXT: %[[VAR255:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR256:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR255]]
   // CHECK-NEXT: %[[VAR257:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR252]], %[[VAR254]]
   // CHECK-NEXT: %[[VAR258:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR256]]
-  // CHECK-NEXT: %[[VAR259:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR260:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR259:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR260:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR257]], i[[LLSIZE]]* %[[VAR259]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR258]], i[[LLSIZE]]* %[[VAR260]]
 
   cull1 = ull + cull;
-  // CHECK-NEXT: %[[VAR261:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
-  // CHECK-NEXT: %[[VAR262:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR263:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR262]]
-  // CHECK-NEXT: %[[VAR264:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR265:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR264]]
+  // CHECK-NEXT: %[[VAR261:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR262:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR263:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR262]]
+  // CHECK-NEXT: %[[VAR264:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR265:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR264]]
   // CHECK-NEXT: %[[VAR266:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR261]], %[[VAR263]]
   // CHECK-NEXT: %[[VAR267:[A-Za-z0-9.]+]] = add i[[LLSIZE]] 0, %[[VAR265]]
-  // CHECK-NEXT: %[[VAR268:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR269:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR268:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR269:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR266]], i[[LLSIZE]]* %[[VAR268]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR267]], i[[LLSIZE]]* %[[VAR269]]
 
   csc1 = csc + sc;
-  // CHECK-NEXT: %[[VAR270:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR271:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR270]]
-  // CHECK-NEXT: %[[VAR272:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR273:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR272]]
+  // CHECK-NEXT: %[[VAR270:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR271:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR270]]
+  // CHECK-NEXT: %[[VAR272:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR273:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR272]]
   // CHECK-NEXT: %[[VAR274:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR271]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR275:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR273]] to i[[ARSIZE]]
-  // CHECK-NEXT: %[[VAR276:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR276:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR277:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR276]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR278:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR274]], %[[VAR277]]
   // CHECK-NEXT: %[[VAR279:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR275]], 0
   // CHECK-NEXT: %[[VAR280:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR278]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR281:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR279]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR282:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR283:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR282:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR283:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR280]], i[[CHSIZE]]* %[[VAR282]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR281]], i[[CHSIZE]]* %[[VAR283]]
 
   csc1 = csc + uc;
-  // CHECK-NEXT: %[[VAR284:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR285:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR284]]
-  // CHECK-NEXT: %[[VAR286:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR287:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR286]]
+  // CHECK-NEXT: %[[VAR284:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR285:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR284]]
+  // CHECK-NEXT: %[[VAR286:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR287:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR286]]
   // CHECK-NEXT: %[[VAR288:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR285]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR289:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR287]] to i[[ARSIZE]]
-  // CHECK-NEXT: %[[VAR290:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR290:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR291:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR290]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR292:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR288]], %[[VAR291]]
   // CHECK-NEXT: %[[VAR293:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR289]], 0
   // CHECK-NEXT: %[[VAR294:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR292]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR295:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR293]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR296:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR297:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR296:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR297:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR294]], i[[CHSIZE]]* %[[VAR296]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR295]], i[[CHSIZE]]* %[[VAR297]]
 
   csll1 = csc + sll;
-  // CHECK-NEXT: %[[VAR298:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR299:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR298]]
-  // CHECK-NEXT: %[[VAR300:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR301:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR300]]
+  // CHECK-NEXT: %[[VAR298:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR299:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR298]]
+  // CHECK-NEXT: %[[VAR300:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR301:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR300]]
   // CHECK-NEXT: %[[VAR302:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR299]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR303:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR301]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR304:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR304:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR305:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR302]], %[[VAR304]]
   // CHECK-NEXT: %[[VAR306:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR303]], 0
-  // CHECK-NEXT: %[[VAR307:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR308:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR307:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR308:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR305]], i[[LLSIZE]]* %[[VAR307]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR306]], i[[LLSIZE]]* %[[VAR308]]
 
   csll1 = csc + ull;
-  // CHECK-NEXT: %[[VAR309:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR310:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR309]]
-  // CHECK-NEXT: %[[VAR311:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR312:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR311]]
+  // CHECK-NEXT: %[[VAR309:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR310:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR309]]
+  // CHECK-NEXT: %[[VAR311:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR312:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR311]]
   // CHECK-NEXT: %[[VAR313:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR310]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR314:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR312]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR315:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR315:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR316:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR313]], %[[VAR315]]
   // CHECK-NEXT: %[[VAR317:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR314]], 0
-  // CHECK-NEXT: %[[VAR318:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR319:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR318:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR319:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR316]], i[[LLSIZE]]* %[[VAR318]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR317]], i[[LLSIZE]]* %[[VAR319]]
   
   csc1 = cuc + sc;
-  // CHECK-NEXT: %[[VAR320:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR321:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR320]]
-  // CHECK-NEXT: %[[VAR322:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR323:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR322]]
+  // CHECK-NEXT: %[[VAR320:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR321:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR320]]
+  // CHECK-NEXT: %[[VAR322:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR323:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR322]]
   // CHECK-NEXT: %[[VAR324:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR321]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR325:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR323]] to i[[ARSIZE]]
-  // CHECK-NEXT: %[[VAR326:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR326:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR327:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR326]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR328:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR324]], %[[VAR327]]
   // CHECK-NEXT: %[[VAR329:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR325]], 0
   // CHECK-NEXT: %[[VAR330:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR328]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR331:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR329]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR332:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR333:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR332:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR333:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CSC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR330]], i[[CHSIZE]]* %[[VAR332]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR331]], i[[CHSIZE]]* %[[VAR333]]
 
   cuc1 = cuc + uc;
-  // CHECK-NEXT: %[[VAR334:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR335:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR334]]
-  // CHECK-NEXT: %[[VAR336:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR337:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR336]]
+  // CHECK-NEXT: %[[VAR334:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR335:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR334]]
+  // CHECK-NEXT: %[[VAR336:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR337:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR336]]
   // CHECK-NEXT: %[[VAR338:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR335]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR339:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR337]] to i[[ARSIZE]]
-  // CHECK-NEXT: %[[VAR340:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR340:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR341:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR340]] to i[[ARSIZE]]
   // CHECK-NEXT: %[[VAR342:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR338]], %[[VAR341]]
   // CHECK-NEXT: %[[VAR343:[A-Za-z0-9.]+]] = add i[[ARSIZE]] %[[VAR339]], 0
   // CHECK-NEXT: %[[VAR344:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR342]] to i[[CHSIZE]]
   // CHECK-NEXT: %[[VAR345:[A-Za-z0-9.]+]] = trunc i[[ARSIZE]] %[[VAR343]] to i[[CHSIZE]]
-  // CHECK-NEXT: %[[VAR346:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR347:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR346:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR347:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR344]], i[[CHSIZE]]* %[[VAR346]]
   // CHECK-NEXT: store i[[CHSIZE]] %[[VAR345]], i[[CHSIZE]]* %[[VAR347]]
 
   csll1 = cuc + sll;
-  // CHECK-NEXT: %[[VAR348:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR349:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR348]]
-  // CHECK-NEXT: %[[VAR350:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR351:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR350]]
+  // CHECK-NEXT: %[[VAR348:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR349:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR348]]
+  // CHECK-NEXT: %[[VAR350:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR351:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR350]]
   // CHECK-NEXT: %[[VAR352:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR349]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR353:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR351]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR354:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR354:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR355:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR352]], %[[VAR354]]
   // CHECK-NEXT: %[[VAR356:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR353]], 0
-  // CHECK-NEXT: %[[VAR357:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR358:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR357:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR358:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR355]], i[[LLSIZE]]* %[[VAR357]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR356]], i[[LLSIZE]]* %[[VAR358]]
 
   cull1 = cuc + ull;
-  // CHECK-NEXT: %[[VAR357:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR358:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR357]]
-  // CHECK-NEXT: %[[VAR359:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR360:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[VAR359]]
+  // CHECK-NEXT: %[[VAR357:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR358:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR357]]
+  // CHECK-NEXT: %[[VAR359:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]] }, { i[[CHSIZE]], i[[CHSIZE]] }* %[[CUC]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR360:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR359]]
   // CHECK-NEXT: %[[VAR361:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR358]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR362:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR360]] to i[[LLSIZE]]
-  // CHECK-NEXT: %[[VAR363:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR363:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR364:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR361]], %[[VAR363]]
   // CHECK-NEXT: %[[VAR365:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR362]], 0
-  // CHECK-NEXT: %[[VAR366:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR367:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR366:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR367:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR364]], i[[LLSIZE]]* %[[VAR366]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR365]], i[[LLSIZE]]* %[[VAR367]]
 
   csll1 = csll + sc;
-  // CHECK-NEXT: %[[VAR368:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR369:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR368]]
-  // CHECK-NEXT: %[[VAR370:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR371:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR370]]
-  // CHECK-NEXT: %[[VAR372:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR368:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR369:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR368]]
+  // CHECK-NEXT: %[[VAR370:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR371:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR370]]
+  // CHECK-NEXT: %[[VAR372:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR373:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR372]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR374:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR369]], %[[VAR373]]
   // CHECK-NEXT: %[[VAR375:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR371]], 0
-  // CHECK-NEXT: %[[VAR376:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR377:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR376:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR377:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR374]], i[[LLSIZE]]* %[[VAR376]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR375]], i[[LLSIZE]]* %[[VAR377]]
 
   csll1 = csll + uc;
-  // CHECK-NEXT: %[[VAR378:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR379:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR378]]
-  // CHECK-NEXT: %[[VAR380:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR381:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR380]]
-  // CHECK-NEXT: %[[VAR382:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR378:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR379:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR378]]
+  // CHECK-NEXT: %[[VAR380:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR381:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR380]]
+  // CHECK-NEXT: %[[VAR382:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR383:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR382]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR384:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR379]], %[[VAR383]]
   // CHECK-NEXT: %[[VAR385:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR381]], 0
-  // CHECK-NEXT: %[[VAR386:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR387:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR386:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR387:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR384]], i[[LLSIZE]]* %[[VAR386]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR385]], i[[LLSIZE]]* %[[VAR387]]
 
   csll1 = csll + sll;
-  // CHECK-NEXT: %[[VAR388:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR389:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR388]]
-  // CHECK-NEXT: %[[VAR390:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR391:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR390]]
-  // CHECK-NEXT: %[[VAR392:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR388:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR389:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR388]]
+  // CHECK-NEXT: %[[VAR390:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR391:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR390]]
+  // CHECK-NEXT: %[[VAR392:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR393:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR389]], %[[VAR392]]
   // CHECK-NEXT: %[[VAR394:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR391]], 0
-  // CHECK-NEXT: %[[VAR395:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR396:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR395:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR396:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR393]], i[[LLSIZE]]* %[[VAR395]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR394]], i[[LLSIZE]]* %[[VAR396]]
 
   csll1 = csll + ull;
-  // CHECK-NEXT: %[[VAR397:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR398:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR397]]
-  // CHECK-NEXT: %[[VAR399:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR400:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR399]]
-  // CHECK-NEXT: %[[VAR401:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR397:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR398:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR397]]
+  // CHECK-NEXT: %[[VAR399:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR400:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR399]]
+  // CHECK-NEXT: %[[VAR401:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR402:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR398]], %[[VAR401]]
   // CHECK-NEXT: %[[VAR403:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR400]], 0
-  // CHECK-NEXT: %[[VAR404:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR405:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR404:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR405:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR402]], i[[LLSIZE]]* %[[VAR404]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR403]], i[[LLSIZE]]* %[[VAR405]]
   
   csll1 = cull + sc;
-  // CHECK-NEXT: %[[VAR406:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR407:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR406]]
-  // CHECK-NEXT: %[[VAR408:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR409:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR408]]
-  // CHECK-NEXT: %[[VAR410:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR406:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR407:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR406]]
+  // CHECK-NEXT: %[[VAR408:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR409:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR408]]
+  // CHECK-NEXT: %[[VAR410:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[SCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR411:[A-Za-z0-9.]+]] = sext i[[CHSIZE]] %[[VAR410]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR412:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR407]], %[[VAR411]]
   // CHECK-NEXT: %[[VAR413:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR409]], 0
-  // CHECK-NEXT: %[[VAR414:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR415:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR414:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR415:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR412]], i[[LLSIZE]]* %[[VAR414]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR413]], i[[LLSIZE]]* %[[VAR415]]
 
   cull1 = cull + uc;
-  // CHECK-NEXT: %[[VAR416:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR417:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR416]]
-  // CHECK-NEXT: %[[VAR418:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR419:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR418]]
-  // CHECK-NEXT: %[[VAR420:[A-Za-z0-9.]+]] = load i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
+  // CHECK-NEXT: %[[VAR416:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR417:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR416]]
+  // CHECK-NEXT: %[[VAR418:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR419:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR418]]
+  // CHECK-NEXT: %[[VAR420:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[UCADDR]], align [[CHALIGN]]
   // CHECK-NEXT: %[[VAR421:[A-Za-z0-9.]+]] = zext i[[CHSIZE]] %[[VAR420]] to i[[LLSIZE]]
   // CHECK-NEXT: %[[VAR422:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR417]], %[[VAR421]]
   // CHECK-NEXT: %[[VAR423:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR419]], 0
-  // CHECK-NEXT: %[[VAR424:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR425:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR424:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR425:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR422]], i[[LLSIZE]]* %[[VAR424]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR423]], i[[LLSIZE]]* %[[VAR425]]
 
   csll1 = cull + sll;
-  // CHECK-NEXT: %[[VAR426:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR427:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR426]]
-  // CHECK-NEXT: %[[VAR428:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR429:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR428]]
-  // CHECK-NEXT: %[[VAR430:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR426:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR427:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR426]]
+  // CHECK-NEXT: %[[VAR428:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR429:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR428]]
+  // CHECK-NEXT: %[[VAR430:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[SLLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR431:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR427]], %[[VAR430]]
   // CHECK-NEXT: %[[VAR432:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR429]], 0
-  // CHECK-NEXT: %[[VAR433:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR434:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR433:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR434:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CSLL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR431]], i[[LLSIZE]]* %[[VAR433]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR432]], i[[LLSIZE]]* %[[VAR434]]
 
   cull1 = cull + ull;
-  // CHECK-NEXT: %[[VAR435:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR436:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR435]]
-  // CHECK-NEXT: %[[VAR437:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK-NEXT: %[[VAR438:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[VAR437]]
-  // CHECK-NEXT: %[[VAR439:[A-Za-z0-9.]+]] = load i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
+  // CHECK-NEXT: %[[VAR435:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR436:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR435]]
+  // CHECK-NEXT: %[[VAR437:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR438:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[VAR437]]
+  // CHECK-NEXT: %[[VAR439:[A-Za-z0-9.]+]] = load i[[LLSIZE]], i[[LLSIZE]]* %[[ULLADDR]], align [[LLALIGN]]
   // CHECK-NEXT: %[[VAR440:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR436]], %[[VAR439]]
   // CHECK-NEXT: %[[VAR441:[A-Za-z0-9.]+]] = add i[[LLSIZE]] %[[VAR438]], 0
-  // CHECK-NEXT: %[[VAR442:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK-NEXT: %[[VAR443:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+  // CHECK-NEXT: %[[VAR442:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+  // CHECK-NEXT: %[[VAR443:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[LLSIZE]], i[[LLSIZE]] }, { i[[LLSIZE]], i[[LLSIZE]] }* %[[CULL1]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR440]], i[[LLSIZE]]* %[[VAR442]]
   // CHECK-NEXT: store i[[LLSIZE]] %[[VAR441]], i[[LLSIZE]]* %[[VAR443]]
 }
diff --git a/test/CodeGen/compound-literal.c b/test/CodeGen/compound-literal.c
index 458a78e..85138bf 100644
--- a/test/CodeGen/compound-literal.c
+++ b/test/CodeGen/compound-literal.c
@@ -18,13 +18,13 @@
   // CHECK: [[S:%[a-zA-Z0-9.]+]] = alloca [[STRUCT:%[a-zA-Z0-9.]+]],
   struct S s;
   // CHECK-NEXT: [[COMPOUNDLIT:%[a-zA-Z0-9.]+]] = alloca [[STRUCT]]
-  // CHECK-NEXT: [[CX:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]]* [[COMPOUNDLIT]], i32 0, i32 0
-  // CHECK-NEXT: [[SY:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]]* [[S]], i32 0, i32 1
-  // CHECK-NEXT: [[TMP:%[a-zA-Z0-9.]+]] = load i32* [[SY]]
+  // CHECK-NEXT: [[CX:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]], [[STRUCT]]* [[COMPOUNDLIT]], i32 0, i32 0
+  // CHECK-NEXT: [[SY:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]], [[STRUCT]]* [[S]], i32 0, i32 1
+  // CHECK-NEXT: [[TMP:%[a-zA-Z0-9.]+]] = load i32, i32* [[SY]]
   // CHECK-NEXT: store i32 [[TMP]], i32* [[CX]]
-  // CHECK-NEXT: [[CY:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]]* [[COMPOUNDLIT]], i32 0, i32 1
-  // CHECK-NEXT: [[SX:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]]* [[S]], i32 0, i32 0
-  // CHECK-NEXT: [[TMP:%[a-zA-Z0-9.]+]] = load i32* [[SX]]
+  // CHECK-NEXT: [[CY:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]], [[STRUCT]]* [[COMPOUNDLIT]], i32 0, i32 1
+  // CHECK-NEXT: [[SX:%[a-zA-Z0-9.]+]] = getelementptr inbounds [[STRUCT]], [[STRUCT]]* [[S]], i32 0, i32 0
+  // CHECK-NEXT: [[TMP:%[a-zA-Z0-9.]+]] = load i32, i32* [[SX]]
   // CHECK-NEXT: store i32 [[TMP]], i32* [[CY]]
   // CHECK-NEXT: [[SI8:%[a-zA-Z0-9.]+]] = bitcast [[STRUCT]]* [[S]] to i8*
   // CHECK-NEXT: [[COMPOUNDLITI8:%[a-zA-Z0-9.]+]] = bitcast [[STRUCT]]* [[COMPOUNDLIT]] to i8*
@@ -46,16 +46,16 @@
   // CHECK-NEXT: store i32
 
   // Evaluate the compound literal directly in the result value slot.
-  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[G]]* [[RESULT]], i32 0, i32 0
-  // CHECK-NEXT: [[T1:%.*]] = load i32* [[X]], align 4
+  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[G]], [[G]]* [[RESULT]], i32 0, i32 0
+  // CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[X]], align 4
   // CHECK-NEXT: [[T2:%.*]] = trunc i32 [[T1]] to i16
   // CHECK-NEXT: store i16 [[T2]], i16* [[T0]], align 2
-  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[G]]* [[RESULT]], i32 0, i32 1
-  // CHECK-NEXT: [[T1:%.*]] = load i32* [[Y]], align 4
+  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[G]], [[G]]* [[RESULT]], i32 0, i32 1
+  // CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[Y]], align 4
   // CHECK-NEXT: [[T2:%.*]] = trunc i32 [[T1]] to i16
   // CHECK-NEXT: store i16 [[T2]], i16* [[T0]], align 2
-  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[G]]* [[RESULT]], i32 0, i32 2
-  // CHECK-NEXT: [[T1:%.*]] = load i32* [[Z]], align 4
+  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[G]], [[G]]* [[RESULT]], i32 0, i32 2
+  // CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[Z]], align 4
   // CHECK-NEXT: [[T2:%.*]] = trunc i32 [[T1]] to i16
   // CHECK-NEXT: store i16 [[T2]], i16* [[T0]], align 2
   return (struct G) { x, y, z };
@@ -63,6 +63,6 @@
   // CHECK-NEXT: [[T0:%.*]] = bitcast i48* [[COERCE_TEMP]] to i8*
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[G]]* [[RESULT]] to i8*
   // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 6
-  // CHECK-NEXT: [[T0:%.*]] = load i48* [[COERCE_TEMP]]
+  // CHECK-NEXT: [[T0:%.*]] = load i48, i48* [[COERCE_TEMP]]
   // CHECK-NEXT: ret i48 [[T0]]
 }
diff --git a/test/CodeGen/const-arithmetic.c b/test/CodeGen/const-arithmetic.c
index a28f73f..564ed9b 100644
--- a/test/CodeGen/const-arithmetic.c
+++ b/test/CodeGen/const-arithmetic.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: @g1 = global [2 x i8*] [i8* getelementptr (i8* getelementptr inbounds ([0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -2), i8* getelementptr (i8* getelementptr inbounds ([0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -46)], align 16
-// CHECK: @g2 = global [2 x i8*] [i8* getelementptr (i8* getelementptr inbounds ([0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -2), i8* getelementptr (i8* getelementptr inbounds ([0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -46)], align 16
+// CHECK: @g1 = global [2 x i8*] [i8* getelementptr (i8, i8* getelementptr inbounds ([0 x %struct.anon], [0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -2), i8* getelementptr (i8, i8* getelementptr inbounds ([0 x %struct.anon], [0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -46)], align 16
+// CHECK: @g2 = global [2 x i8*] [i8* getelementptr (i8, i8* getelementptr inbounds ([0 x %struct.anon], [0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -2), i8* getelementptr (i8, i8* getelementptr inbounds ([0 x %struct.anon], [0 x %struct.anon]* @g0, i32 0, i32 0, i32 0), i64 -46)], align 16
 
 extern struct { unsigned char a, b; } g0[];
 void *g1[] = {g0 + -1, g0 + -23 };
diff --git a/test/CodeGen/const-init.c b/test/CodeGen/const-init.c
index b86274f..9434f1d 100644
--- a/test/CodeGen/const-init.c
+++ b/test/CodeGen/const-init.c
@@ -102,7 +102,7 @@
   static int *p[] = { &g19 };
 }
 
-// CHECK: @g20.l0 = internal global %struct.g20_s1 { %struct.g20_s0* null, %struct.g20_s0** getelementptr inbounds (%struct.g20_s1* @g20.l0, i32 0, i32 0) }
+// CHECK: @g20.l0 = internal global %struct.g20_s1 { %struct.g20_s0* null, %struct.g20_s0** getelementptr inbounds (%struct.g20_s1, %struct.g20_s1* @g20.l0, i32 0, i32 0) }
 struct g20_s0;
 struct g20_s1 {
   struct g20_s0 *f0, **f1;
@@ -121,7 +121,7 @@
 struct g23 {char a; short b; char c; struct g22 d;};
 struct g23 g24 = {1,2,3,4};
 
-// CHECK: @g25.g26 = internal global i8* getelementptr inbounds ([4 x i8]* @__func__.g25, i32 0, i32 0)
+// CHECK: @g25.g26 = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__func__.g25, i32 0, i32 0)
 // CHECK: @__func__.g25 = private unnamed_addr constant [4 x i8] c"g25\00"
 int g25() {
   static const char *g26 = __func__;
diff --git a/test/CodeGen/debug-info-args.c b/test/CodeGen/debug-info-args.c
index 33cf5bc..9389047 100644
--- a/test/CodeGen/debug-info-args.c
+++ b/test/CodeGen/debug-info-args.c
@@ -2,7 +2,7 @@
 
 int somefunc(char *x, int y, double z) {
   
-  // CHECK: ![[NUM:[^,]*]], null, null, null} ; [ DW_TAG_subroutine_type
+  // CHECK: !MDSubroutineType(types: ![[NUM:[0-9]+]])
   // CHECK: ![[NUM]] = {{!{![^,]*, ![^,]*, ![^,]*, ![^,]*}}}
   
   return y;
diff --git a/test/CodeGen/debug-info-block-out-return.c b/test/CodeGen/debug-info-block-out-return.c
index 47b90ce..282fa4f 100644
--- a/test/CodeGen/debug-info-block-out-return.c
+++ b/test/CodeGen/debug-info-block-out-return.c
@@ -11,8 +11,8 @@
 // out of order or not at all (the latter would occur if they were both assigned
 // the same argument number by mistake).
 
-// CHECK: !"0x101\00.block_descriptor\0016777218\00{{[0-9]+}}", {{.*}} ; [ DW_TAG_arg_variable ] [.block_descriptor]
-// CHECK: !"0x101\00param\0033554434\00{{[0-9]+}}", {{.*}} ; [ DW_TAG_arg_variable ] [param]
+// CHECK: !MDLocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", arg: 1,{{.*}}line: 2,
+// CHECK: !MDLocalVariable(tag: DW_TAG_arg_variable, name: "param", arg: 2,{{.*}}line: 2,
 
 // Line directive so we don't have to worry about how many lines preceed the
 // test code (as the line number is mangled in with the argument number as shown
diff --git a/test/CodeGen/debug-info-block.c b/test/CodeGen/debug-info-block.c
index 35ee0dd..4b5706b 100644
--- a/test/CodeGen/debug-info-block.c
+++ b/test/CodeGen/debug-info-block.c
@@ -2,8 +2,8 @@
 // Verify that the desired debugging type is generated for a structure
 //  member that is a pointer to a block. 
 
-// CHECK: __block_literal_generic{{.*}}DW_TAG_structure_type
-// CHECK: __block_descriptor{{.*}}DW_TAG_structure_type
+// CHECK: !MDCompositeType(tag: DW_TAG_structure_type, name: "__block_literal_generic"
+// CHECK: !MDCompositeType(tag: DW_TAG_structure_type, name: "__block_descriptor"
 struct inStruct {
   void (^genericBlockPtr)();
 } is;
diff --git a/test/CodeGen/debug-info-enum.c b/test/CodeGen/debug-info-enum.c
index 1f6b384..e32c731 100644
--- a/test/CodeGen/debug-info-enum.c
+++ b/test/CodeGen/debug-info-enum.c
@@ -1,8 +1,9 @@
 // RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
 
-// CHECK: [[TEST3_ENUMS:![0-9]*]], null, null, null} ; [ DW_TAG_enumeration_type ] [e]
+// CHECK: !MDCompositeType(tag: DW_TAG_enumeration_type, name: "e"
+// CHECK-SAME:             elements: [[TEST3_ENUMS:![0-9]*]]
 // CHECK: [[TEST3_ENUMS]] = !{[[TEST3_E:![0-9]*]]}
-// CHECK: [[TEST3_E]] = !{!"0x28\00E\00-1"} ; [ DW_TAG_enumerator ] [E :: -1]
+// CHECK: [[TEST3_E]] = !MDEnumerator(name: "E", value: -1)
 
 enum e;
 void func(enum e *p) {
diff --git a/test/CodeGen/debug-info-file-change.c b/test/CodeGen/debug-info-file-change.c
index 8f869d0..f4c1251 100644
--- a/test/CodeGen/debug-info-file-change.c
+++ b/test/CodeGen/debug-info-file-change.c
@@ -14,8 +14,9 @@
   return i + j;
 }
 
-// CHECK: DW_TAG_lexical_block
-// CHECK: !"m.h"
-// CHECK: DW_TAG_lexical_block
-// CHECK: !"m.c"
-// CHECK-NOT: DW_TAG_lexical_block
+// CHECK-NOT: !MDLexicalBlock
+// CHECK: !MDLexicalBlockFile({{.*}}file: ![[MH:[0-9]+]]
+// CHECK: !MDFile(filename: "m.h"
+// CHECK: !MDLexicalBlockFile({{.*}}file: ![[MC:[0-9]+]]
+// CHECK: !MDFile(filename: "m.c"
+// CHECK-NOT: !MDLexicalBlock
diff --git a/test/CodeGen/debug-info-gline-tables-only2.c b/test/CodeGen/debug-info-gline-tables-only2.c
index 8e9cc64..e28856f 100644
--- a/test/CodeGen/debug-info-gline-tables-only2.c
+++ b/test/CodeGen/debug-info-gline-tables-only2.c
@@ -8,6 +8,6 @@
 }
 
 // CHECK: !llvm.dbg.cu = !{!0}
-// CHECK: DW_TAG_compile_unit
-// CHECK: {{.*main.* DW_TAG_subprogram}}
-// CHECK: DW_TAG_file_type
+// CHECK: !MDCompileUnit(
+// CHECK: !MDSubprogram(
+// CHECK: !MDFile(
diff --git a/test/CodeGen/debug-info-limited.c b/test/CodeGen/debug-info-limited.c
index 7333452..72f9fb7 100644
--- a/test/CodeGen/debug-info-limited.c
+++ b/test/CodeGen/debug-info-limited.c
@@ -3,7 +3,9 @@
 // Ensure we emit the full definition of 'foo' even though only its declaration
 // is needed, since C has no ODR to ensure that the definition will be the same
 // in whatever TU actually uses/requires the definition of 'foo'.
-// CHECK: ; [ DW_TAG_structure_type ] [foo] {{.*}} [def]
+// CHECK: !MDCompositeType(tag: DW_TAG_structure_type, name: "foo",
+// CHECK-NOT:              DIFlagFwdDecl
+// CHECK-SAME:             ){{$}}
 
 struct foo {
 };
diff --git a/test/CodeGen/debug-info-same-line.c b/test/CodeGen/debug-info-same-line.c
index dddabda..e9fe3a7 100644
--- a/test/CodeGen/debug-info-same-line.c
+++ b/test/CodeGen/debug-info-same-line.c
@@ -2,6 +2,6 @@
 // Here two temporary nodes are identical (but should not get uniqued) while
 // building the full debug type.
 typedef struct { long x; } foo; typedef struct {  foo *x; } bar;
-// CHECK: [ DW_TAG_structure_type ] [line 4, size 64,
-// CHECK: [ DW_TAG_structure_type ] [line 4, size 64,
+// CHECK: !MDCompositeType(tag: DW_TAG_structure_type,{{.*}} line: 4, size: 64,
+// CHECK: !MDCompositeType(tag: DW_TAG_structure_type,{{.*}} line: 4, size: 64,
 bar b;
diff --git a/test/CodeGen/debug-info-scope-file.c b/test/CodeGen/debug-info-scope-file.c
index 226fb27..74456a0 100644
--- a/test/CodeGen/debug-info-scope-file.c
+++ b/test/CodeGen/debug-info-scope-file.c
@@ -5,8 +5,8 @@
 
 // CHECK: ret void, !dbg [[F1_LINE:![0-9]*]]
 // CHECK: ret void, !dbg [[F2_LINE:![0-9]*]]
-// CHECK: [[F1:![0-9]*]] = {{.*}} ; [ DW_TAG_subprogram ] {{.*}} [def] [f1]
-// CHECK: [[F2:![0-9]*]] = {{.*}} ; [ DW_TAG_subprogram ] {{.*}} [def] [f2]
+// CHECK: [[F1:![0-9]*]] = !MDSubprogram(name: "f1",{{.*}} isDefinition: true
+// CHECK: [[F2:![0-9]*]] = !MDSubprogram(name: "f2",{{.*}} isDefinition: true
 // CHECK: [[F1_LINE]] = !MDLocation({{.*}}, scope: [[F1]])
 // CHECK: [[F2_LINE]] = !MDLocation({{.*}}, scope: [[F2]])
 
diff --git a/test/CodeGen/debug-info-scope.c b/test/CodeGen/debug-info-scope.c
index d84fafd..5709e3e 100644
--- a/test/CodeGen/debug-info-scope.c
+++ b/test/CodeGen/debug-info-scope.c
@@ -5,23 +5,23 @@
 int main() {
 	int j = 0;
 	int k = 0;
-// CHECK: DW_TAG_auto_variable ] [i]
-// CHECK-NEXT: DW_TAG_lexical_block
+// CHECK: !MDLocalVariable(tag: DW_TAG_auto_variable, name: "i"
+// CHECK-NEXT: !MDLexicalBlock(
 
 // FIXME: Looks like we don't actually need both these lexical blocks (disc 2
 // just refers to disc 1, nothing actually uses disc 2).
-// GMLT-NOT: DW_TAG_lexical_block
-// GMLT: "0xb\002", {{.*}}} ; [ DW_TAG_lexical_block ]
-// GMLT-NOT: DW_TAG_lexical_block
-// GMLT: "0xb\001", {{.*}}} ; [ DW_TAG_lexical_block ]
+// GMLT-NOT: !MDLexicalBlock
+// GMLT: !MDLexicalBlockFile({{.*}}, discriminator: 2)
+// GMLT-NOT: !MDLexicalBlock
+// GMLT: !MDLexicalBlockFile({{.*}}, discriminator: 1)
 // Make sure we don't have any more lexical blocks because we don't need them in
 // -gmlt.
-// GMLT-NOT: DW_TAG_lexical_block
+// GMLT-NOT: !MDLexicalBlock
 	for (int i = 0; i < 10; i++)
 		j++;
-// CHECK: DW_TAG_auto_variable ] [i]
-// CHECK-NEXT: DW_TAG_lexical_block
-// GMLT-NOT: DW_TAG_lexical_block
+// CHECK: !MDLocalVariable(tag: DW_TAG_auto_variable, name: "i"
+// CHECK-NEXT: !MDLexicalBlock(
+// GMLT-NOT: !MDLexicalBlock
 	for (int i = 0; i < 10; i++)
 		k++;
 	return 0;
diff --git a/test/CodeGen/debug-info-static.c b/test/CodeGen/debug-info-static.c
index 931c9e2..cd4526a 100644
--- a/test/CodeGen/debug-info-static.c
+++ b/test/CodeGen/debug-info-static.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1  -g -emit-llvm -o - %s | FileCheck %s
 
-// CHECK:  xyzzy, null} ; [ DW_TAG_variable ]
+// CHECK: !MDGlobalVariable({{.*}}variable: i32* @f.xyzzy
 void f(void)
 {
    static int xyzzy;
diff --git a/test/CodeGen/debug-info-typedef.c b/test/CodeGen/debug-info-typedef.c
index 3db7d53..a50b7f1 100644
--- a/test/CodeGen/debug-info-typedef.c
+++ b/test/CodeGen/debug-info-typedef.c
@@ -7,5 +7,5 @@
 
 MyType a;
 
-// CHECK:  !"0x16\00MyType\002\00{{.*}}", ![[HEADER:[0-9]+]], null{{.*}}} ; [ DW_TAG_typedef ] [MyType] [line 2, size 0, align 0, offset 0] [from int]
-// CHECK: ![[HEADER]] = !{!"b.h",
+// CHECK: !MDDerivedType(tag: DW_TAG_typedef, name: "MyType", file: ![[HEADER:[0-9]+]], line: 2,
+// CHECK: ![[HEADER]] = !MDFile(filename: "b.h",
diff --git a/test/CodeGen/debug-info-var-location.c b/test/CodeGen/debug-info-var-location.c
deleted file mode 100644
index 41da7d3..0000000
--- a/test/CodeGen/debug-info-var-location.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang -Xclang -triple=%itanium_abi_triple -S -g -fverbose-asm %s -o - | FileCheck %s
-// Radar 8461032
-// CHECK: DW_AT_location
-// CHECK-NEXT: byte 145
-
-// 145 is DW_OP_fbreg
-struct s {
-  int a;
-  struct s *next;
-};
-
-int foo(struct  s *s) {
-  switch (s->a) {
-  case 1:
-  case 2: {
-    struct s *sp = s->next;
-  }
-    break;
-  }
-  return 1;
-}
diff --git a/test/CodeGen/debug-info-vector.c b/test/CodeGen/debug-info-vector.c
index b7135af..047f1b2 100644
--- a/test/CodeGen/debug-info-vector.c
+++ b/test/CodeGen/debug-info-vector.c
@@ -4,4 +4,8 @@
 v4si a;
 
 // Test that we get an array type that's also a vector out of debug.
-// CHECK: [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+// CHECK: !MDCompositeType(tag: DW_TAG_array_type,
+// CHECK-SAME:             baseType: ![[INT:[0-9]+]]
+// CHECK-SAME:             size: 128, align: 128
+// CHECK-SAME:             DIFlagVector
+// CHECK: ![[INT]] = !MDBasicType(name: "int"
diff --git a/test/CodeGen/debug-info-version.c b/test/CodeGen/debug-info-version.c
index 5fe2e4e..fa7e20e 100644
--- a/test/CodeGen/debug-info-version.c
+++ b/test/CodeGen/debug-info-version.c
@@ -4,5 +4,5 @@
   return 0;
 }
 
-// CHECK:  i32 2, !"Debug Info Version", i32 2}
+// CHECK:  i32 2, !"Debug Info Version", i32 3}
 // NO_DEBUG-NOT: !"Debug Info Version"
diff --git a/test/CodeGen/debug-info-vla.c b/test/CodeGen/debug-info-vla.c
index 7d321cf..d58dc91 100644
--- a/test/CodeGen/debug-info-vla.c
+++ b/test/CodeGen/debug-info-vla.c
@@ -4,8 +4,8 @@
 {
 // CHECK: dbg.declare
 // CHECK: dbg.declare({{.*}}, metadata ![[VAR:.*]], metadata ![[EXPR:.*]])
-// CHECK: ![[VAR]] = {{.*}} ; [ DW_TAG_auto_variable ] [vla] [line [[@LINE+2]]]
-// CHECK: ![[EXPR]] = {{.*}} ; [ DW_TAG_expression ] [DW_OP_deref]
+// CHECK: ![[VAR]] = !MDLocalVariable(tag: DW_TAG_auto_variable, name: "vla",{{.*}} line: [[@LINE+2]]
+// CHECK: ![[EXPR]] = !MDExpression(DW_OP_deref)
   int vla[s];
   int i;
   for (i = 0; i < s; i++) {
diff --git a/test/CodeGen/debug-info.c b/test/CodeGen/debug-info.c
index bf08c58..e1ee087 100644
--- a/test/CodeGen/debug-info.c
+++ b/test/CodeGen/debug-info.c
@@ -42,7 +42,7 @@
 
 
 // Radar 7325611
-// CHECK: !"0x16\00barfoo\00{{.*}}"
+// CHECK: !MDDerivedType(tag: DW_TAG_typedef, name: "barfoo"
 typedef int barfoo;
 barfoo foo() {
 }
diff --git a/test/CodeGen/exceptions-seh-finally.c b/test/CodeGen/exceptions-seh-finally.c
index 1bb60d8..eeadf0d 100644
--- a/test/CodeGen/exceptions-seh-finally.c
+++ b/test/CodeGen/exceptions-seh-finally.c
@@ -22,7 +22,7 @@
 //
 // CHECK: [[finally]]
 // CHECK: call void @cleanup()
-// CHECK: load i8* %[[abnormal]]
+// CHECK: load i8, i8* %[[abnormal]]
 // CHECK: icmp eq
 // CHECK: br i1 %{{.*}}, label %[[finallycont:[^ ]*]], label %[[resumecont:[^ ]*]]
 //
@@ -97,10 +97,10 @@
 // CHECK: br label %[[finally:[^ ]*]]
 //
 // CHECK: [[finally]]
-// CHECK: load i8* %[[abnormal]]
+// CHECK: load i8, i8* %[[abnormal]]
 // CHECK: zext i8 %{{.*}} to i32
 // CHECK: store i32 %{{.*}}, i32* @crashed
-// CHECK: load i8* %[[abnormal]]
+// CHECK: load i8, i8* %[[abnormal]]
 // CHECK: icmp eq
 // CHECK: br i1 %{{.*}}, label %[[finallycont:[^ ]*]], label %[[resumecont:[^ ]*]]
 //
@@ -209,7 +209,7 @@
 // CHECK-NEXT: br label %[[finallycont:[^ ]*]]
 //
 // CHECK: [[finallycont]]
-// CHECK-NEXT: %[[dest:[^ ]*]] = load i32* %
+// CHECK-NEXT: %[[dest:[^ ]*]] = load i32, i32* %
 // CHECK-NEXT: switch i32 %[[dest]]
 // CHECK-NEXT: i32 0, label %[[cleanupcont:[^ ]*]]
 //
@@ -218,7 +218,7 @@
 // CHECK-NEXT: br label %[[return:[^ ]*]]
 //
 // CHECK: [[return]]
-// CHECK-NEXT: %[[reg:[^ ]*]] = load i32* %
+// CHECK-NEXT: %[[reg:[^ ]*]] = load i32, i32* %
 // CHECK-NEXT: ret i32 %[[reg]]
 
 int nested___finally___finally_with_eh_edge() {
@@ -255,12 +255,12 @@
 // CHECK-NEXT: br label %[[outerfinally:[^ ]*]]
 //
 // CHECK: [[outerfinally]]
-// CHECK-NEXT: %[[abnormallocal:[^ ]*]] = load i8* %[[abnormal]]
+// CHECK-NEXT: %[[abnormallocal:[^ ]*]] = load i8, i8* %[[abnormal]]
 // CHECK-NEXT: %[[reg:[^ ]*]] = icmp eq i8 %[[abnormallocal]], 0
 // CHECK-NEXT: br i1 %[[reg]], label %[[finallycont:[^ ]*]], label %[[finallyresume:[^ ]*]]
 //
 // CHECK: [[finallycont]]
-// CHECK-NEXT: %[[dest:[^ ]*]] = load i32* %
+// CHECK-NEXT: %[[dest:[^ ]*]] = load i32, i32* %
 // CHECK-NEXT: switch i32 %[[dest]]
 // CHECK-NEXT: i32 0, label %[[cleanupcont:[^ ]*]]
 //
@@ -283,7 +283,7 @@
 // CHECK-NEXT: br label %[[ehresume:[^ ]*]]
 //
 // CHECK: [[return]]
-// CHECK-NEXT: %[[reg:[^ ]*]] = load i32* %
+// CHECK-NEXT: %[[reg:[^ ]*]] = load i32, i32* %
 // CHECK-NEXT: ret i32 %[[reg]]
 //
 // The ehresume block, not reachable either.
diff --git a/test/CodeGen/exceptions-seh-leave.c b/test/CodeGen/exceptions-seh-leave.c
index 7901c8c..0d38439 100644
--- a/test/CodeGen/exceptions-seh-leave.c
+++ b/test/CodeGen/exceptions-seh-leave.c
@@ -125,8 +125,6 @@
 //////////////////////////////////////////////////////////////////////////////
 // Mixed, nested cases.
 
-// FIXME: Test with outer __finally once PR22553 is fixed.
-
 int nested___except___finally() {
   int myres = 0;
   __try {
@@ -237,3 +235,136 @@
 
 // CHECK: [[tryleave]]
 // CHECK-NEXT: br label %[[trycont4]]
+
+int nested___finally___except() {
+  int myres = 0;
+  __try {
+    __try {
+      g();
+    } __except (1) {
+      g();
+      __leave;  // Refers to the outer __try, not the __except!
+      myres = 23;
+      return 0;
+    }
+
+    myres = 51;
+  } __finally {
+  }
+  return 1;
+}
+// The order of basic blocks in the below doesn't matter.
+// CHECK-LABEL: define i32 @nested___finally___except()
+
+// CHECK-LABEL: invoke void bitcast (void (...)* @g to void ()*)()
+// CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
+
+// CHECK: [[g1_cont]]
+// CHECK-NEXT: br label %[[trycont:[^ ]*]]
+
+// CHECK: [[g1_lpad]]
+// CHECK:  br label %[[except:[^ ]*]]
+
+// CHECK: [[except]]
+// CHECK-NEXT: invoke void bitcast (void (...)* @g to void ()*)()
+// CHECK-NEXT:       to label %[[g2_cont:.*]] unwind label %[[g2_lpad:.*]]
+
+// CHECK: [[g2_cont]]
+// CHECK-NEXT: br label %[[tryleave:[^ ]*]]
+// CHECK-NOT: 23
+
+// CHECK: [[g2_lpad]]
+// CHECK: store i8 1, i8* %[[abnormal:[^ ]*]]
+// CHECK-NEXT: br label %[[finally:[^ ]*]]
+
+// CHECK: [[trycont]]
+// CHECK: store i32 51, i32* %
+// CHECK-NEXT: br label %[[tryleave]]
+
+// CHECK: [[tryleave]]
+// CHECK-NEXT: store i8 0, i8* %[[abnormal]]
+// CHECK-NEXT: br label %[[finally:[^ ]*]]
+
+// CHECK: [[finally]]
+// CHECK-NEXT: %[[abnormallocal:[^ ]*]] = load i8, i8* %[[abnormal]]
+// CHECK-NEXT: %[[reg:[^ ]*]] = icmp eq i8 %[[abnormallocal]], 0
+// CHECK-NEXT: br i1 %[[reg]], label %[[finallycont:[^ ]*]], label %[[finallyresume:[^ ]*]]
+
+// CHECK: [[finallycont]]
+// CHECK-NEXT: ret i32 1
+
+// CHECK: [[finallyresume]]
+// CHECK-NEXT: br label %[[ehresume:[^ ]*]]
+
+// CHECK: [[ehresume]]
+// CHECK: resume
+
+int nested___finally___finally() {
+  int myres = 0;
+  __try {
+    __try {
+      g();
+      myres = 16;
+    } __finally {
+      g();
+      __leave;  // Refers to the outer __try, not the __finally we're in!
+      myres = 23;
+      return 0;
+    }
+
+    myres = 51;
+  } __finally {
+  }
+  return 1;
+}
+// The order of basic blocks in the below doesn't matter.
+// CHECK-LABEL: define i32 @nested___finally___finally()
+
+// CHECK-LABEL: invoke void bitcast (void (...)* @g to void ()*)()
+// CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
+
+// CHECK: [[g1_cont]]
+// CHECK: store i32 16, i32* %[[myres:[^ ]*]],
+// CHECK: store i8 0, i8* %[[abnormal:[^ ]*]]
+// CHECK-NEXT: br label %[[finally:[^ ]*]]
+
+// CHECK: [[finally]]
+// CHECK-NEXT: invoke void bitcast (void (...)* @g to void ()*)() #3
+// CHECK-NEXT:       to label %[[g2_cont:.*]] unwind label %[[g2_lpad:.*]]
+
+// CHECK: [[g2_cont]]
+// CHECK-NEXT: br label %[[tryleave:[^ ]*]]
+// CHECK-NOT: store i32 23
+
+// There's an unreachable block for the skipped `myres = 51`.
+// CHECK: store i32 51, i32* %[[myres]]
+// CHECK-NEXT: br label %[[tryleave]]
+
+// CHECK: [[tryleave]]
+// CHECK-NEXT: store i8 0, i8* %[[abnormal]]
+// CHECK-NEXT: br label %[[outerfinally:[^ ]*]]
+
+// CHECK: [[outerfinally]]
+// CHECK-NEXT: %[[abnormallocal:[^ ]*]] = load i8, i8* %[[abnormal]]
+// CHECK-NEXT: %[[reg:[^ ]*]] = icmp eq i8 %[[abnormallocal]], 0
+// CHECK-NEXT: br i1 %[[reg]], label %[[finallycont:[^ ]*]], label %[[finallyresume:[^ ]*]]
+
+// CHECK: [[finallycont]]
+// CHECK-NEXT: ret i32 1
+
+// CHECK: [[g1_lpad]]
+// CHECK: store i8 1, i8* %[[abnormal]]
+// CHECK-NEXT: br label %[[finally:[^ ]*]]
+
+// CHECK: [[g2_lpad]]
+// CHECK: br label %[[ehcleanup:[^ ]*]]
+
+// CHECK: [[ehcleanup]]
+// CHECK-NEXT: store i8 1, i8* %[[abnormal]]
+// CHECK-NEXT: br label %[[outerfinally]]
+
+// CHECK: [[finallyresume]]
+// CHECK-NEXT: br label %[[ehresume:[^ ]*]]
+
+// CHECK: [[ehresume]]
+// CHECK: resume
diff --git a/test/CodeGen/exceptions-seh.c b/test/CodeGen/exceptions-seh.c
index ebe97be..2d9d4b1 100644
--- a/test/CodeGen/exceptions-seh.c
+++ b/test/CodeGen/exceptions-seh.c
@@ -32,7 +32,7 @@
 // CHECK: [[except]]
 // CHECK-NEXT: store i32 -42, i32* %[[success:[^ ]*]]
 //
-// CHECK: %[[res:[^ ]*]] = load i32* %[[success]]
+// CHECK: %[[res:[^ ]*]] = load i32, i32* %[[success]]
 // CHECK: ret i32 %[[res]]
 
 void j(void);
@@ -57,7 +57,7 @@
 // CHECK-NEXT: catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@filter_expr_capture@@" to i8*)
 // CHECK: store i32 13, i32* %[[r]]
 //
-// CHECK: %[[rv:[^ ]*]] = load i32* %[[r]]
+// CHECK: %[[rv:[^ ]*]] = load i32, i32* %[[r]]
 // CHECK: ret i32 %[[rv]]
 
 // CHECK-LABEL: define internal i32 @"\01?filt$0@0@filter_expr_capture@@"(i8* %exception_pointers, i8* %frame_pointer)
@@ -95,12 +95,12 @@
 // CHECK: store i8* %{{.*}}, i8** %[[ehptr_slot:[^ ]*]]
 // CHECK: store i32 %{{.*}}, i32* %[[sel_slot:[^ ]*]]
 //
-// CHECK: load i32* %[[sel_slot]]
+// CHECK: load i32, i32* %[[sel_slot]]
 // CHECK: call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$1@0@nested_try@@" to i8*))
 // CHECK: icmp eq i32
 // CHECK: br i1
 //
-// CHECK: load i32* %[[sel_slot]]
+// CHECK: load i32, i32* %[[sel_slot]]
 // CHECK: call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@nested_try@@" to i8*))
 // CHECK: icmp eq i32
 // CHECK: br i1
@@ -109,7 +109,7 @@
 // CHECK: br label %[[outer_try_cont:[^ ]*]]
 //
 // CHECK: [[outer_try_cont]]
-// CHECK: %[[r_load:[^ ]*]] = load i32* %[[r]]
+// CHECK: %[[r_load:[^ ]*]] = load i32, i32* %[[r]]
 // CHECK: ret i32 %[[r_load]]
 //
 // CHECK: store i32 123, i32* %[[r]]
@@ -130,7 +130,7 @@
   }
 }
 // CHECK-LABEL: define void @basic_finally()
-// CHECK: load i32* @g
+// CHECK: load i32, i32* @g
 // CHECK: add i32 %{{.*}}, 1
 // CHECK: store i32 %{{.*}}, i32* @g
 //
@@ -141,7 +141,7 @@
 // CHECK: br label %[[finally:[^ ]*]]
 //
 // CHECK: [[finally]]
-// CHECK: load i32* @g
+// CHECK: load i32, i32* @g
 // CHECK: add i32 %{{.*}}, -1
 // CHECK: store i32 %{{.*}}, i32* @g
 // CHECK: icmp eq
@@ -177,7 +177,7 @@
 // CHECK: br label %[[retbb]]
 //
 // CHECK: [[retbb]]
-// CHECK: %[[r:[^ ]*]] = load i32* %[[rv]]
+// CHECK: %[[r:[^ ]*]] = load i32, i32* %[[rv]]
 // CHECK: ret i32 %[[r]]
 
 // CHECK: attributes #[[NOINLINE]] = { {{.*noinline.*}} }
diff --git a/test/CodeGen/exprs.c b/test/CodeGen/exprs.c
index 2a22169..f7b6ab8 100644
--- a/test/CodeGen/exprs.c
+++ b/test/CodeGen/exprs.c
@@ -127,9 +127,9 @@
   return A[X];
 
 // CHECK: [[Xaddr:%[^ ]+]] = alloca i64, align 8
-// CHECK: load {{.*}}* [[Xaddr]]
-// CHECK-NEXT: getelementptr inbounds [100 x i32]* %A, i32 0, 
-// CHECK-NEXT: load i32*
+// CHECK: load {{.*}}, {{.*}}* [[Xaddr]]
+// CHECK-NEXT: getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 0, 
+// CHECK-NEXT: load i32, i32*
 }
 
 int f12() {
diff --git a/test/CodeGen/ext-vector-indexing.c b/test/CodeGen/ext-vector-indexing.c
index 28c0e1e..fff1fa4 100644
--- a/test/CodeGen/ext-vector-indexing.c
+++ b/test/CodeGen/ext-vector-indexing.c
@@ -8,7 +8,7 @@
   va.hi[0] = 3.0;
 // CHECK:  [[VA:%.*]] = alloca <4 x float>
 // CHECK:  [[CONV:%.*]] = bitcast <4 x float>* [[VA]] to float*
-// CHECK:  [[ADD:%.*]] = getelementptr inbounds float* [[CONV]], i64 2
-// CHECK:  [[ARRIDX:%.*]] = getelementptr inbounds float* [[ADD]], i64 0
+// CHECK:  [[ADD:%.*]] = getelementptr inbounds float, float* [[CONV]], i64 2
+// CHECK:  [[ARRIDX:%.*]] = getelementptr inbounds float, float* [[ADD]], i64 0
 // CHECK:   store float 3.000000e+00, float* [[ARRIDX]]
 }
diff --git a/test/CodeGen/ext-vector-member-alignment.c b/test/CodeGen/ext-vector-member-alignment.c
index 49e6997..5f044b8 100644
--- a/test/CodeGen/ext-vector-member-alignment.c
+++ b/test/CodeGen/ext-vector-member-alignment.c
@@ -17,11 +17,11 @@
   // FIXME: We should be able to come up with a more aggressive alignment
   // estimate.
   // CHECK: @func
-  // CHECK: load <4 x float>* {{%.*}}, align 1
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
   // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>* {{%.*}}, align 1
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
   // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 1
   // CHECK: ret void
 }
diff --git a/test/CodeGen/extern-inline.c b/test/CodeGen/extern-inline.c
index 77cb270..2c0fde9 100644
--- a/test/CodeGen/extern-inline.c
+++ b/test/CodeGen/extern-inline.c
@@ -8,7 +8,7 @@
 int g(void) {return f(0);}
 // CHECK: call i32 @f
 int f(int b) {return 1+b;}
-// CHECK: load i32* %{{.*}}
+// CHECK: load i32, i32* %{{.*}}
 // CHECK: add nsw i32 1, %{{.*}}
 int h(void) {return f(1);}
 // CHECK: call i32 @f
@@ -18,8 +18,8 @@
 int g2(void) {return f2(0,1);}
 // CHECK: call i32 @f2
 static int f2(int a, int b) {return a*b;}
-// CHECK: load i32* %{{.*}}
-// CHECK: load i32* %{{.*}}
+// CHECK: load i32, i32* %{{.*}}
+// CHECK: load i32, i32* %{{.*}}
 // CHECK: mul nsw i32 %{{.*}}, %{{.*}}
 int h2(void) {return f2(1,2);}
 // CHECK: call i32 @f2
diff --git a/test/CodeGen/fp16-ops.c b/test/CodeGen/fp16-ops.c
index b269cf8..0c218be 100644
--- a/test/CodeGen/fp16-ops.c
+++ b/test/CodeGen/fp16-ops.c
@@ -1,284 +1,314 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
 typedef unsigned cond_t;
 
 volatile cond_t test;
 volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
+volatile double d0;
 
 void foo(void) {
   // CHECK-LABEL: define void @foo()
 
   // Check unary ops
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK fptoi float
+  // NOHALF: [[F16TOF32:call float @llvm.convert.from.fp16.f32]]
+  // HALF: [[F16TOF32:fpext half]]
+  // CHECK: fptoui float
   test = (h0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: uitofp i32
+  // NOHALF: [[F32TOF16:call i16 @llvm.convert.to.fp16.f32]]
+  // HALF: [[F32TOF16:fptrunc float]]
+  h0 = (test);
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp une float
   test = (!h1);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fsub float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // NOHALF: [[F32TOF16]]
+  // HALF: [[F32TOF16]]
   h1 = -h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F32TOF16]]
   h1 = +h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1++;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   ++h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   --h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1--;
 
   // Check binary ops with various operands
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fmul float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = h0 * h2;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F32TOF16]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fmul float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = h0 * (__fp16) -2.0;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F32TOF16]]
+  h1 = h0 * (__fp16) -2.0f;
+  // CHECK: [[F16TOF32]]
   // CHECK: fmul float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = h0 * f2;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fmul float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = f0 * h2;
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fdiv float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (h0 / h2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = (h0 / (__fp16) -2.0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F32TOF16]]
+  h1 = (h0 / (__fp16) -2.0f);
+  // CHECK: [[F16TOF32]]
   // CHECK: fdiv float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (h0 / f2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fdiv float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (f0 / h2);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (h2 + h0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = ((__fp16)-2.0 + h0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (h2 + f0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (f2 + h0);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fsub float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (h2 - h0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fsub float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = ((__fp16)-2.0 - h0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F32TOF16]]
+  h1 = ((__fp16)-2.0f - h0);
+  // CHECK: [[F16TOF32]]
   // CHECK: fsub float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (h2 - f0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fsub float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h1 = (f2 - h0);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp olt
   test = (h2 < h0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp olt
   test = (h2 < (__fp16)42.0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp olt
   test = (h2 < f0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp olt
   test = (f2 < h0);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp ogt
   test = (h0 > h2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ogt
   test = ((__fp16)42.0 > h2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp ogt
   test = (h0 > f2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp ogt
   test = (f0 > h2);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp ole
   test = (h2 <= h0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ole
   test = (h2 <= (__fp16)42.0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp ole
   test = (h2 <= f0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp ole
   test = (f2 <= h0);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp oge
   test = (h0 >= h2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oge
   test = (h0 >= (__fp16)-2.0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp oge
   test = (h0 >= f2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp oge
   test = (f0 >= h2);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp oeq
   test = (h1 == h2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oeq
   test = (h1 == (__fp16)1.0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp oeq
   test = (h1 == f1);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp oeq
   test = (f1 == h1);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp une
   test = (h1 != h2);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp une
   test = (h1 != (__fp16)1.0);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp une
   test = (h1 != f1);
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp une
   test = (f1 != h1);
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fcmp une
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F32TOF16]]
   h1 = (h1 ? h2 : h0);
   // Check assignments (inc. compound)
   h0 = h1;
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 = (__fp16)-2.0;
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // NOHALF: [[F32TOF16]]
+  // HALF: store {{.*}} half 0xHC000
+  h0 = (__fp16)-2.0f;
+  // CHECK: [[F32TOF16]]
   h0 = f0;
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd float
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 += h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fadd
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 += (__fp16)1.0;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F32TOF16]]
+  h0 += (__fp16)1.0f;
+  // CHECK: [[F16TOF32]]
   // CHECK: fadd
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 += f2;
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fsub
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 -= h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fsub
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 -= (__fp16)1.0;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fsub
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 -= f2;
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fmul
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 *= h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fmul
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 *= (__fp16)1.0;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fmul
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 *= f2;
 
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // CHECK: [[F16TOF32]]
   // CHECK: fdiv
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 /= h1;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
+  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 /= (__fp16)1.0;
-  // CHECK: call float @llvm.convert.from.fp16.f32(
+  // CHECK: [[F16TOF32]]
   // CHECK: fdiv
-  // CHECK: call i16 @llvm.convert.to.fp16.f32(
+  // CHECK: [[F32TOF16]]
   h0 /= f2;
+
+  // Check conversions to/from double
+  // NOHALF: call i16 @llvm.convert.to.fp16.f64(
+  // HALF: fptrunc double {{.*}} to half
+  h0 = d0;
+
+  // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
+  // NOHALF: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
+  // HALF: fptrunc float [[MID]] to half
+  h0 = (float)d0;
+
+  // NOHALF: call double @llvm.convert.from.fp16.f64(
+  // HALF: fpext half {{.*}} to double
+  d0 = h0;
+
+  // NOHALF: [[MID:%.*]] = call float @llvm.convert.from.fp16.f32(
+  // HALF: [[MID:%.*]] = fpext half {{.*}} to float
+  // CHECK: fpext float [[MID]] to double
+  d0 = (float)h0;
 }
diff --git a/test/CodeGen/function-target-features.c b/test/CodeGen/function-target-features.c
new file mode 100644
index 0000000..5665b1f
--- /dev/null
+++ b/test/CodeGen/function-target-features.c
@@ -0,0 +1,21 @@
+// This test verifies that we produce target-cpu and target-features attributes
+// on functions when they're different from the standard cpu and have written
+// features.
+
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-FEATURE
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-NO-CPU
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512er | FileCheck %s -check-prefix=TWO-AVX
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 | FileCheck %s -check-prefix=CORE-CPU
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 -target-feature +avx | FileCheck %s -check-prefix=CORE-CPU-AND-FEATURES
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu x86-64 | FileCheck %s -check-prefix=X86-64-CPU-NOT
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7-avx -target-feature -avx | FileCheck %s -check-prefix=AVX-MINUS-FEATURE
+
+void foo() {}
+
+// AVX-FEATURE: "target-features"="+avx"
+// AVX-NO-CPU-NOT: target-cpu
+// TWO-AVX: "target-features"="+avx512f,+avx512er"
+// CORE-CPU: "target-cpu"="corei7"
+// CORE-CPU-AND-FEATURES: "target-cpu"="corei7" "target-features"="+avx"
+// X86-64-CPU-NOT: "target-cpu"
+// AVX-MINUS-FEATURE: "target-features"="-avx"
diff --git a/test/CodeGen/init.c b/test/CodeGen/init.c
index b396c3b..a2b4920 100644
--- a/test/CodeGen/init.c
+++ b/test/CodeGen/init.c
@@ -135,8 +135,8 @@
 
 // CHECK-LABEL: @PR20473
 void PR20473() {
-  // CHECK: memcpy{{.*}}([2 x i8]* @
+  // CHECK: memcpy{{.*}}getelementptr inbounds ([2 x i8], [2 x i8]* @
   bar((char[2]) {""});
-  // CHECK: memcpy{{.*}}([3 x i8]* @
+  // CHECK: memcpy{{.*}}getelementptr inbounds ([3 x i8], [3 x i8]* @
   bar((char[3]) {""});
 }
diff --git a/test/CodeGen/integer-overflow.c b/test/CodeGen/integer-overflow.c
index a007960..eee1da7 100644
--- a/test/CodeGen/integer-overflow.c
+++ b/test/CodeGen/integer-overflow.c
@@ -60,10 +60,10 @@
   // -fwrapv should turn off inbounds for GEP's, PR9256
   extern int* P;
   ++P;
-  // DEFAULT: getelementptr inbounds i32*
-  // WRAPV: getelementptr i32*
-  // TRAPV: getelementptr inbounds i32*
-  // CATCH_UB: getelementptr inbounds i32*
+  // DEFAULT: getelementptr inbounds i32, i32*
+  // WRAPV: getelementptr i32, i32*
+  // TRAPV: getelementptr inbounds i32, i32*
+  // CATCH_UB: getelementptr inbounds i32, i32*
 
   // PR9350: char increment never overflows.
   extern volatile signed char PR9350;
diff --git a/test/CodeGen/invalid_global_asm.c b/test/CodeGen/invalid_global_asm.c
new file mode 100644
index 0000000..5b7e8b4
--- /dev/null
+++ b/test/CodeGen/invalid_global_asm.c
@@ -0,0 +1,5 @@
+// REQUIRES: arm-registered-target
+// RUN: not %clang_cc1 -emit-obj -triple armv6-unknown-unknown -o %t %s 2>&1 | FileCheck %s
+#pragma clang diagnostic ignored "-Wmissing-noreturn"
+__asm__(".Lfoo: movw r2, #:lower16:.Lbar - .Lfoo");
+// CHECK: <inline asm>:1:8: error: instruction requires: armv6t2
diff --git a/test/CodeGen/lineno-dbginfo.c b/test/CodeGen/lineno-dbginfo.c
index 28c7243..1a0b701 100644
--- a/test/CodeGen/lineno-dbginfo.c
+++ b/test/CodeGen/lineno-dbginfo.c
@@ -1,6 +1,9 @@
 // RUN: echo "#include <stddef.h>" > %t.h
 // RUN: %clang_cc1 -S -g -include %t.h %s -emit-llvm -o - | FileCheck %s
 
-// CHECK: !"0x34\00outer\00outer\00\00[[@LINE+1]]\000\001"
+// CHECK: !MDGlobalVariable(name: "outer",
+// CHECK-NOT:               linkageName:
+// CHECK-SAME:              line: [[@LINE+2]]
+// CHECK-SAME:              isDefinition: true
 int outer = 42;
 
diff --git a/test/CodeGen/mangle-blocks.c b/test/CodeGen/mangle-blocks.c
index c5e08e9..bc30e35 100644
--- a/test/CodeGen/mangle-blocks.c
+++ b/test/CodeGen/mangle-blocks.c
@@ -18,6 +18,6 @@
 // CHECK: define internal void @__mangle_block_invoke(i8* %.block_descriptor)
 
 // CHECK: define internal void @__mangle_block_invoke_2(i8* %.block_descriptor){{.*}}{
-// CHECK:   call void @__assert_rtn(i8* getelementptr inbounds ([24 x i8]* @__func__.__mangle_block_invoke_2, i32 0, i32 0), i8* getelementptr inbounds {{.*}}, i32 9, i8* getelementptr inbounds ([7 x i8]* @.str1, i32 0, i32 0))
+// CHECK:   call void @__assert_rtn(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @__func__.__mangle_block_invoke_2, i32 0, i32 0), i8* getelementptr inbounds {{.*}}, i32 9, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str1, i32 0, i32 0))
 // CHECK: }
 
diff --git a/test/CodeGen/mips-inline-asm-modifiers.c b/test/CodeGen/mips-inline-asm-modifiers.c
index 9437dbe..3116e76 100644
--- a/test/CodeGen/mips-inline-asm-modifiers.c
+++ b/test/CodeGen/mips-inline-asm-modifiers.c
@@ -7,8 +7,8 @@
 
 typedef int v4i32 __attribute__((vector_size(16)));
 
-  // CHECK: %{{[0-9]+}} = call i32 asm ".set noreorder;\0Alw    $0,$1;\0A.set reorder;\0A", "=r,*m,~{$1}"(i32* getelementptr inbounds ([8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2,
-  // CHECK: %{{[0-9]+}} = call i32 asm "lw    $0,${1:D};\0A", "=r,*m,~{$1}"(i32* getelementptr inbounds ([8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2,
+  // CHECK: %{{[0-9]+}} = call i32 asm ".set noreorder;\0Alw    $0,$1;\0A.set reorder;\0A", "=r,*m,~{$1}"(i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2,
+  // CHECK: %{{[0-9]+}} = call i32 asm "lw    $0,${1:D};\0A", "=r,*m,~{$1}"(i32* getelementptr inbounds ([8 x i32], [8 x i32]* @b, i32 {{[0-9]+}}, i32 {{[0-9]+}})) #2,
   // CHECK: %{{[0-9]+}} = call <4 x i32> asm "ldi.w ${0:w},1", "=f,~{$1}"
 int b[8] = {0,1,2,3,4,5,6,7};
 int  main()
diff --git a/test/CodeGen/mips-varargs.c b/test/CodeGen/mips-varargs.c
index 383831f..8fd1df6 100644
--- a/test/CodeGen/mips-varargs.c
+++ b/test/CodeGen/mips-varargs.c
@@ -29,18 +29,18 @@
 // ALL:   call void @llvm.va_start(i8* [[VA1]])
 //
 // O32:   [[TMP0:%.+]] = bitcast i8** %va to i32**
-// O32:   [[AP_CUR:%.+]] = load i32** [[TMP0]], align [[PTRALIGN]]
+// O32:   [[AP_CUR:%.+]] = load i32*, i32** [[TMP0]], align [[PTRALIGN]]
 // NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]]
+// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[AP_NEXT:%.+]] = getelementptr i32* [[AP_CUR]], i32 1
-// NEW:   [[AP_NEXT:%.+]] = getelementptr i64* [[AP_CUR]], {{i32|i64}} 1
+// O32:   [[AP_NEXT:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 1
+// NEW:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], {{i32|i64}} 1
 //
 // O32:   store i32* [[AP_NEXT]], i32** [[TMP0]], align [[PTRALIGN]]
 // NEW:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[ARG1:%.+]] = load i32* [[AP_CUR]], align 4
-// NEW:   [[TMP2:%.+]] = load i64* [[AP_CUR]], align 8
+// O32:   [[ARG1:%.+]] = load i32, i32* [[AP_CUR]], align 4
+// NEW:   [[TMP2:%.+]] = load i64, i64* [[AP_CUR]], align 8
 // NEW:   [[ARG1:%.+]] = trunc i64 [[TMP2]] to i32
 //
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
@@ -65,29 +65,29 @@
 // ALL:   call void @llvm.va_start(i8* [[VA1]])
 //
 // O32:   [[TMP0:%.+]] = bitcast i8** %va to i32**
-// O32:   [[AP_CUR:%.+]] = load i32** [[TMP0]], align [[PTRALIGN]]
+// O32:   [[AP_CUR:%.+]] = load i32*, i32** [[TMP0]], align [[PTRALIGN]]
 // NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]]
+// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[AP_NEXT1:%.+]] = getelementptr i32* [[AP_CUR]], i32 1
-// NEW:   [[AP_NEXT1:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
+// O32:   [[AP_NEXT1:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 1
+// NEW:   [[AP_NEXT1:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
 //
 // O32:   store i32* [[AP_NEXT1]], i32** [[TMP0]], align [[PTRALIGN]]
 // FIXME: N32 optimised this store out. Why only for this ABI?
 // N64:   store i64* [[AP_NEXT1]], i64** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[ARG1:%.+]] = load i32* [[AP_CUR]], align 4
-// NEW:   [[TMP3:%.+]] = load i64* [[AP_CUR]], align 8
+// O32:   [[ARG1:%.+]] = load i32, i32* [[AP_CUR]], align 4
+// NEW:   [[TMP3:%.+]] = load i64, i64* [[AP_CUR]], align 8
 // NEW:   [[ARG1:%.+]] = trunc i64 [[TMP3]] to i32
 //
-// O32:   [[AP_NEXT2:%.+]] = getelementptr i32* [[AP_CUR]], i32 2
-// NEW:   [[AP_NEXT2:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T]] 2
+// O32:   [[AP_NEXT2:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 2
+// NEW:   [[AP_NEXT2:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T]] 2
 //
 // O32:   store i32* [[AP_NEXT2]], i32** [[TMP0]], align [[PTRALIGN]]
 // NEW:   store i64* [[AP_NEXT2]], i64** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[ARG2:%.+]] = load i32* [[AP_NEXT1]], align 4
-// NEW:   [[TMP4:%.+]] = load i64* [[AP_NEXT1]], align 8
+// O32:   [[ARG2:%.+]] = load i32, i32* [[AP_NEXT1]], align 4
+// NEW:   [[TMP4:%.+]] = load i64, i64* [[AP_NEXT1]], align 8
 // NEW:   [[ARG2:%.+]] = trunc i64 [[TMP4]] to i32
 //
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
@@ -111,9 +111,9 @@
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_start(i8* [[VA1]])
 //
-// O32:   [[AP_CUR:%.+]] = load i8** %va, align [[PTRALIGN]]
+// O32:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
 // NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]]
+// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
 //
 // i64 is 8-byte aligned, while this is within O32's stack alignment there's no
 // guarantee that the offset is still 8-byte aligned after earlier reads.
@@ -123,14 +123,14 @@
 // O32:   [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i64*
 // O32:   [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
 //
-// O32:   [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[INTPTR_T]] 8
-// NEW:   [[AP_NEXT:%.+]] = getelementptr i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
+// O32:   [[AP_NEXT:%.+]] = getelementptr i8, i8* [[PTR4]], [[INTPTR_T]] 8
+// NEW:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
 //
 // O32:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
 // NEW:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[ARG1:%.+]] = load i64* [[PTR3]], align 8
-// NEW:   [[ARG1:%.+]] = load i64* [[AP_CUR]], align 8
+// O32:   [[ARG1:%.+]] = load i64, i64* [[PTR3]], align 8
+// NEW:   [[ARG1:%.+]] = load i64, i64* [[AP_CUR]], align 8
 //
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
 // ALL:   ret i64 [[ARG1]]
@@ -156,30 +156,30 @@
 // ALL:   call void @llvm.va_start(i8* [[VA1]])
 //
 // O32:   [[TMP0:%.+]] = bitcast i8** %va to i8***
-// O32:   [[AP_CUR:%.+]] = load i8*** [[TMP0]], align [[PTRALIGN]]
+// O32:   [[AP_CUR:%.+]] = load i8**, i8*** [[TMP0]], align [[PTRALIGN]]
 // N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
 // N32:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// N32:   [[AP_CUR:%.+]] = load i64** [[TMP0]], align [[PTRALIGN]]
+// N32:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
 // N64:   [[TMP0:%.+]] = bitcast i8** %va to i8***
-// N64:   [[AP_CUR:%.+]] = load i8*** [[TMP0]], align [[PTRALIGN]]
+// N64:   [[AP_CUR:%.+]] = load i8**, i8*** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[AP_NEXT:%.+]] = getelementptr i8** [[AP_CUR]], i32 1
+// O32:   [[AP_NEXT:%.+]] = getelementptr i8*, i8** [[AP_CUR]], i32 1
 // N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
-// N32:   [[AP_NEXT:%.+]] = getelementptr i64* [[AP_CUR]], {{i32|i64}} 1
-// N64:   [[AP_NEXT:%.+]] = getelementptr i8** [[AP_CUR]], {{i32|i64}} 1
+// N32:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], {{i32|i64}} 1
+// N64:   [[AP_NEXT:%.+]] = getelementptr i8*, i8** [[AP_CUR]], {{i32|i64}} 1
 //
 // O32:   store i8** [[AP_NEXT]], i8*** [[TMP0]], align [[PTRALIGN]]
 // N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
 // N32:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
 // N64:   store i8** [[AP_NEXT]], i8*** [[TMP0]], align [[PTRALIGN]]
 //
-// O32:   [[ARG1:%.+]] = load i8** [[AP_CUR]], align 4
+// O32:   [[ARG1:%.+]] = load i8*, i8** [[AP_CUR]], align 4
 // N32 differs because the vararg is not a N32 pointer. It's been promoted to
 // 64-bit so we must truncate the excess and bitcast to a N32 pointer.
-// N32:   [[TMP2:%.+]] = load i64* [[AP_CUR]], align 8
+// N32:   [[TMP2:%.+]] = load i64, i64* [[AP_CUR]], align 8
 // N32:   [[TMP3:%.+]] = trunc i64 [[TMP2]] to i32
 // N32:   [[ARG1:%.+]] = inttoptr i32 [[TMP3]] to i8*
-// N64:   [[ARG1:%.+]] = load i8** [[AP_CUR]], align 8
+// N64:   [[ARG1:%.+]] = load i8*, i8** [[AP_CUR]], align 8
 //
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
 // ALL:   ret i8* [[ARG1]]
@@ -200,7 +200,7 @@
 // ALL:   %va = alloca i8*, align [[PTRALIGN]]
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_start(i8* [[VA1]])
-// ALL:   [[AP_CUR:%.+]] = load i8** %va, align [[PTRALIGN]]
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
 //
 // O32:   [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]]
 // N32:   [[PTR0:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T:i32]]
@@ -216,9 +216,9 @@
 //
 // ALL:   [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to <4 x i32>*
 // ALL:   [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
-// ALL:   [[AP_NEXT:%.+]] = getelementptr i8* [[PTR4]], [[INTPTR_T]] 16
+// ALL:   [[AP_NEXT:%.+]] = getelementptr i8, i8* [[PTR4]], [[INTPTR_T]] 16
 // ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
-// ALL:   [[PTR5:%.+]] = load <4 x i32>* [[PTR3]], align 16
+// ALL:   [[PTR5:%.+]] = load <4 x i32>, <4 x i32>* [[PTR3]], align 16
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
 // ALL:   [[VECEXT:%.+]] = extractelement <4 x i32> [[PTR5]], i32 0
 // ALL:   ret i32 [[VECEXT]]
diff --git a/test/CodeGen/ms-anonymous-struct.c b/test/CodeGen/ms-anonymous-struct.c
index b41caab..bf33406 100644
--- a/test/CodeGen/ms-anonymous-struct.c
+++ b/test/CodeGen/ms-anonymous-struct.c
@@ -26,34 +26,34 @@
   // CHECK: %var = alloca %struct.test, align 4
   struct test var;
 
-  // CHECK: getelementptr inbounds %struct.test* %var, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 0
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK: getelementptr inbounds %struct.test, %struct.test* %var, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 0
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var.a;
 
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %var, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 2
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %var, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 2
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var.b;
 
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %var, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested1* %{{.*}}, i32 0, i32 0
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %var, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested1, %struct.nested1* %{{.*}}, i32 0, i32 0
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var.a1;
 
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %{{.*}}var, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested1* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %{{.*}}var, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested1, %struct.nested1* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var.b1;
 
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %var, i32 0, i32 0
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %var, i32 0, i32 0
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var.x;
 
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %var, i32 0, i32 2
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %var, i32 0, i32 2
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var.y;
 }
 
@@ -61,39 +61,39 @@
 {
   // CHECK: alloca %struct.test*, align
   // CHECK-NEXT: store %struct.test* %var, %struct.test** %{{.*}}, align
-  // CHECK-NEXT: load %struct.test** %{{.*}}, align
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 0
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: load %struct.test*, %struct.test** %{{.*}}, align
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 0
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var->a;
 
-  // CHECK-NEXT: load %struct.test** %{{.*}}, align
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 2
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: load %struct.test*, %struct.test** %{{.*}}, align
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 2
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var->b;
 
-  // CHECK-NEXT: load %struct.test** %{{.*}}, align
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested1* %{{.*}}, i32 0, i32 0
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: load %struct.test*, %struct.test** %{{.*}}, align
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested1, %struct.nested1* %{{.*}}, i32 0, i32 0
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var->a1;
 
-  // CHECK-NEXT: load %struct.test** %{{.*}}, align
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested2* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: getelementptr inbounds %struct.nested1* %{{.*}}, i32 0, i32 1
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: load %struct.test*, %struct.test** %{{.*}}, align
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested2, %struct.nested2* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: getelementptr inbounds %struct.nested1, %struct.nested1* %{{.*}}, i32 0, i32 1
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var->b1;
 
-  // CHECK-NEXT: load %struct.test** %{{.*}}, align
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %{{.*}}, i32 0, i32 0
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: load %struct.test*, %struct.test** %{{.*}}, align
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %{{.*}}, i32 0, i32 0
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var->x;
 
-  // CHECK-NEXT: load %struct.test** %{{.*}}, align
-  // CHECK-NEXT: getelementptr inbounds %struct.test* %{{.*}}, i32 0, i32 2
-  // CHECK-NEXT: load i32* %{{.*}}, align 4
+  // CHECK-NEXT: load %struct.test*, %struct.test** %{{.*}}, align
+  // CHECK-NEXT: getelementptr inbounds %struct.test, %struct.test* %{{.*}}, i32 0, i32 2
+  // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
   var->y;
 }
diff --git a/test/CodeGen/ms-inline-asm.c b/test/CodeGen/ms-inline-asm.c
index 59ff202..a6f1b71 100644
--- a/test/CodeGen/ms-inline-asm.c
+++ b/test/CodeGen/ms-inline-asm.c
@@ -93,7 +93,7 @@
 // CHECK: [[J:%[a-zA-Z0-9]+]] = alloca i32, align 4
 // CHECK: store i32 1, i32* [[I]], align 4
 // CHECK: call i32 asm sideeffect inteldialect "mov eax, dword ptr $2\0A\09mov dword ptr $0, eax", "=*m,={eax},*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}})
-// CHECK: [[RET:%[a-zA-Z0-9]+]] = load i32* [[J]], align 4
+// CHECK: [[RET:%[a-zA-Z0-9]+]] = load i32, i32* [[J]], align 4
 // CHECK: ret i32 [[RET]]
 }
 
diff --git a/test/CodeGen/ms-inline-asm.cpp b/test/CodeGen/ms-inline-asm.cpp
index 03d971e..123a0e3 100644
--- a/test/CodeGen/ms-inline-asm.cpp
+++ b/test/CodeGen/ms-inline-asm.cpp
@@ -63,8 +63,8 @@
 // CHECK-LABEL: define void @_ZN2T44testEv(
 void T4::test() {
 // CHECK: [[T0:%.*]] = alloca [[T4:%.*]]*,
-// CHECK: [[THIS:%.*]] = load [[T4]]** [[T0]]
-// CHECK: [[X:%.*]] = getelementptr inbounds [[T4]]* [[THIS]], i32 0, i32 0
+// CHECK: [[THIS:%.*]] = load [[T4]]*, [[T4]]** [[T0]]
+// CHECK: [[X:%.*]] = getelementptr inbounds [[T4]], [[T4]]* [[THIS]], i32 0, i32 0
   __asm mov eax, x;
   __asm mov y, eax;
 // CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $1\0A\09mov dword ptr $0, eax", "=*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* @_ZN2T41yE, i32* {{.*}})
diff --git a/test/CodeGen/ms-intrinsics.c b/test/CodeGen/ms-intrinsics.c
index 4498b34..9103622 100644
--- a/test/CodeGen/ms-intrinsics.c
+++ b/test/CodeGen/ms-intrinsics.c
@@ -57,7 +57,7 @@
 
 // CHECK-I386: define i32 @test__readfsdword(i32 %Offset){{.*}}{
 // CHECK-I386:   [[PTR:%[0-9]+]] = inttoptr i32 %Offset to i32 addrspace(257)*
-// CHECK-I386:   [[VALUE:%[0-9]+]] = load volatile i32 addrspace(257)* [[PTR]], align 4
+// CHECK-I386:   [[VALUE:%[0-9]+]] = load volatile i32, i32 addrspace(257)* [[PTR]], align 4
 // CHECK-I386:   ret i32 [[VALUE:%[0-9]+]]
 // CHECK-I386: }
 #endif
diff --git a/test/CodeGen/ms-setjmp.c b/test/CodeGen/ms-setjmp.c
index 5073e08..78928bb 100644
--- a/test/CodeGen/ms-setjmp.c
+++ b/test/CodeGen/ms-setjmp.c
@@ -1,22 +1,25 @@
+// RUN: %clang_cc1 -fms-extensions -DDECLARE_SETJMP -triple i686-windows-msvc   -emit-llvm %s -o - | FileCheck --check-prefix=I386 %s
+// RUN: %clang_cc1 -fms-extensions -DDECLARE_SETJMP -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck --check-prefix=X64 %s
 // RUN: %clang_cc1 -fms-extensions -triple i686-windows-msvc   -emit-llvm %s -o - | FileCheck --check-prefix=I386 %s
 // RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck --check-prefix=X64 %s
-
 typedef char jmp_buf[1];
 
+#ifdef DECLARE_SETJMP
 int _setjmp(jmp_buf env);
 int _setjmpex(jmp_buf env);
+#endif
 
 jmp_buf jb;
 
 int test_setjmp() {
   return _setjmp(jb);
   // I386-LABEL: define i32 @test_setjmp
-  // I386:       %[[call:.*]] = call i32 (i8*, i32, ...)* @_setjmp3(i8* getelementptr inbounds ([1 x i8]* @jb, i32 0, i32 0), i32 0)
+  // I386:       %[[call:.*]] = call i32 (i8*, i32, ...)* @_setjmp3(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i32 0)
   // I386-NEXT:  ret i32 %[[call]]
 
   // X64-LABEL: define i32 @test_setjmp
   // X64:       %[[addr:.*]] = call i8* @llvm.frameaddress(i32 0)
-  // X64:       %[[call:.*]] = call i32 @_setjmp(i8* getelementptr inbounds ([1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
+  // X64:       %[[call:.*]] = call i32 @_setjmp(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
   // X64-NEXT:  ret i32 %[[call]]
 }
 
@@ -24,6 +27,6 @@
   return _setjmpex(jb);
   // X64-LABEL: define i32 @test_setjmpex
   // X64:       %[[addr:.*]] = call i8* @llvm.frameaddress(i32 0)
-  // X64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
+  // X64:       %[[call:.*]] = call i32 @_setjmpex(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @jb, i32 0, i32 0), i8* %[[addr]])
   // X64-NEXT:  ret i32 %[[call]]
 }
diff --git a/test/CodeGen/mult-alt-generic.c b/test/CodeGen/mult-alt-generic.c
index 303edfc..6e7b11e 100644
--- a/test/CodeGen/mult-alt-generic.c
+++ b/test/CodeGen/mult-alt-generic.c
@@ -130,7 +130,7 @@
   asm("foo %1,%0" : "=r" (out0) : "X" (min1));
   // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](i32 1)
   asm("foo %1,%0" : "=r" (out0) : "X" (1));
-  // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
   asm("foo %1,%0" : "=r" (out0) : "X" (marray));
   // CHECK: call i32 asm "foo $1,$0", "=r,X[[CLOBBERS]](double {{[0-9.eE+-]+}})
   asm("foo %1,%0" : "=r" (out0) : "X" (1.0e+01));
@@ -143,7 +143,7 @@
 {
   register int out0 = 0;
   // Constraint converted differently on different platforms moved to platform-specific.
-  // : call i32 asm "foo $1,$0", "=r,im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // : call i32 asm "foo $1,$0", "=r,im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
   asm("foo %1,%0" : "=r" (out0) : "p" (marray));
 }
 
@@ -263,7 +263,7 @@
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (min1));
   // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](i32 1)
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (1));
-  // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (marray));
   // CHECK: call i32 asm "foo $1,$0", "=r|r,r|X[[CLOBBERS]](double {{[0-9.eE+-]+}})
   asm("foo %1,%0" : "=r,r" (out0) : "r,X" (1.0e+01));
@@ -276,6 +276,6 @@
 {
   register int out0 = 0;
   // Constraint converted differently on different platforms moved to platform-specific.
-  // : call i32 asm "foo $1,$0", "=r|r,r|im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
+  // : call i32 asm "foo $1,$0", "=r|r,r|im[[CLOBBERS]](i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{[a-zA-Z0-9@%]+}}, i32 0, i32 0))
   asm("foo %1,%0" : "=r,r" (out0) : "r,p" (marray));
 }
diff --git a/test/CodeGen/named_reg_global.c b/test/CodeGen/named_reg_global.c
index d888a3f..8f9a9c6 100644
--- a/test/CodeGen/named_reg_global.c
+++ b/test/CodeGen/named_reg_global.c
@@ -21,7 +21,7 @@
 // CHECK: declare{{.*}} i[[bits]] @llvm.read_register.i[[bits]](metadata)
 
 // CHECK: define{{.*}} void @set_stack_pointer_addr(i[[bits]] %addr) #0 {
-// CHECK: [[sto:%[0-9]+]] = load i[[bits]]* %
+// CHECK: [[sto:%[0-9]+]] = load i[[bits]], i[[bits]]* %
 // CHECK: call void @llvm.write_register.i[[bits]](metadata !0, i[[bits]] [[sto]])
 // CHECK: ret void
 void set_stack_pointer_addr(unsigned long addr) {
diff --git a/test/CodeGen/no-opt-volatile-memcpy.c b/test/CodeGen/no-opt-volatile-memcpy.c
index 718d704..bf98df3 100644
--- a/test/CodeGen/no-opt-volatile-memcpy.c
+++ b/test/CodeGen/no-opt-volatile-memcpy.c
@@ -19,9 +19,9 @@
 // CHECK-NEXT: %[[ZERO:.*]] = bitcast %struct.s* %[[LS]] to i8*
 // CHECK-NEXT: %[[ONE:.*]] = bitcast %struct.s* %[[LS]] to i8*
 // CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[ZERO]], i8* %[[ONE]], i64 132, i32 4, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s* @gs, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
 // CHECK-NEXT: %[[TWO:.*]] = bitcast %struct.s* %[[LS]] to i8*
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[TWO]], i8* getelementptr inbounds (%struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* %[[TWO]], i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
 
 
 struct s1 {
@@ -35,6 +35,6 @@
   s.y = gs;
 }
 // CHECK-LABEL: define void @fee()
-// CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
-// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
+// CHECK-NEXT: call void @llvm.memcpy.{{.*}}(i8* getelementptr inbounds (%struct.s1, %struct.s1* @s, i32 0, i32 0, i32 0, i32 0), i8* getelementptr inbounds (%struct.s, %struct.s* @gs, i32 0, i32 0, i32 0), i64 132, i32 4, i1 true)
 
diff --git a/test/CodeGen/object-size.c b/test/CodeGen/object-size.c
index 5a4dc99..3fa038a 100644
--- a/test/CodeGen/object-size.c
+++ b/test/CodeGen/object-size.c
@@ -15,31 +15,31 @@
 
 // CHECK-LABEL: define void @test1
 void test1() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i64 4), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 59)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i64 4), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 59)
   strcpy(&gbuf[4], "Hi there");
 }
 
 // CHECK-LABEL: define void @test2
 void test2() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 63)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 63)
   strcpy(gbuf, "Hi there");
 }
 
 // CHECK-LABEL: define void @test3
 void test3() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i64 1, i64 37), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 0)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 1, i64 37), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
   strcpy(&gbuf[100], "Hi there");
 }
 
 // CHECK-LABEL: define void @test4
 void test4() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i64 -1), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 0)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i64 -1), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
   strcpy((char*)(void*)&gbuf[-1], "Hi there");
 }
 
 // CHECK-LABEL: define void @test5
 void test5() {
-  // CHECK:     = load i8** @gp
+  // CHECK:     = load i8*, i8** @gp
   // CHECK-NEXT:= call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
   strcpy(gp, "Hi there");
 }
@@ -48,7 +48,7 @@
 void test6() {
   char buf[57];
 
-  // CHECK:       = call i8* @__strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 53)
+  // CHECK:       = call i8* @__strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 53)
   strcpy(&buf[4], "Hi there");
 }
 
@@ -58,7 +58,7 @@
   // Ensure we only evaluate the side-effect once.
   // CHECK:     = add
   // CHECK-NOT: = add
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i64 63)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 63)
   strcpy((++i, gbuf), "Hi there");
 }
 
@@ -66,14 +66,14 @@
 void test8() {
   char *buf[50];
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(buf[++gi], "Hi there");
 }
 
 // CHECK-LABEL: define void @test9
 void test9() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy((char *)((++gi) + gj), "Hi there");
 }
 
@@ -81,49 +81,49 @@
 char **p;
 void test10() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(*(++p), "Hi there");
 }
 
 // CHECK-LABEL: define void @test11
 void test11() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* getelementptr inbounds ([63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(gp = gbuf, "Hi there");
 }
 
 // CHECK-LABEL: define void @test12
 void test12() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(++gp, "Hi there");
 }
 
 // CHECK-LABEL: define void @test13
 void test13() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(gp++, "Hi there");
 }
 
 // CHECK-LABEL: define void @test14
 void test14() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(--gp, "Hi there");
 }
 
 // CHECK-LABEL: define void @test15
 void test15() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{..*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{..*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(gp--, "Hi there");
 }
 
 // CHECK-LABEL: define void @test16
 void test16() {
   // CHECK-NOT:   __strcpy_chk
-  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0))
+  // CHECK:       = call i8* @__inline_strcpy_chk(i8* %{{.*}}, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0))
   strcpy(gp += 1, "Hi there");
 }
 
diff --git a/test/CodeGen/packed-arrays.c b/test/CodeGen/packed-arrays.c
index 8e748df..993d88e 100644
--- a/test/CodeGen/packed-arrays.c
+++ b/test/CodeGen/packed-arrays.c
@@ -52,10 +52,10 @@
 int align3_x0 = __alignof(((struct s3*) 0)->x[0]);
 
 // CHECK-LABEL: define i32 @f0_a
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 // CHECK-LABEL: define i32 @f0_b
-// CHECK:   load i32* %{{.*}}, align 4
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 int f0_a(struct s0 *a) {
   return a->x[1];
@@ -67,19 +67,19 @@
 // Note that we are incompatible with GCC on this example.
 // 
 // CHECK-LABEL: define i32 @f1_a
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 // CHECK-LABEL: define i32 @f1_b
-// CHECK:   load i32* %{{.*}}, align 4
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 
 // Note that we are incompatible with GCC on this example.
 //
 // CHECK-LABEL: define i32 @f1_c
-// CHECK:   load i32* %{{.*}}, align 4
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 // CHECK-LABEL: define i32 @f1_d
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 int f1_a(struct s1 *a) {
   return a->x[1];
@@ -95,16 +95,16 @@
 }
 
 // CHECK-LABEL: define i32 @f2_a
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 // CHECK-LABEL: define i32 @f2_b
-// CHECK:   load i32* %{{.*}}, align 4
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 // CHECK-LABEL: define i32 @f2_c
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 // CHECK-LABEL: define i32 @f2_d
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 int f2_a(struct s2 *a) {
   return a->x[1];
@@ -120,16 +120,16 @@
 }
 
 // CHECK-LABEL: define i32 @f3_a
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 // CHECK-LABEL: define i32 @f3_b
-// CHECK:   load i32* %{{.*}}, align 4
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 // CHECK-LABEL: define i32 @f3_c
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 // CHECK-LABEL: define i32 @f3_d
-// CHECK:   load i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 1
 // CHECK: }
 int f3_a(struct s3 *a) {
   return a->x[1];
@@ -147,7 +147,7 @@
 // Verify we don't claim things are overaligned.
 //
 // CHECK-LABEL: define double @f4
-// CHECK:   load double* {{.*}}, align 8
+// CHECK:   load double, double* {{.*}}, align 8
 // CHECK: }
 extern double g4[5] __attribute__((aligned(16)));
 double f4() {
diff --git a/test/CodeGen/packed-nest-unpacked.c b/test/CodeGen/packed-nest-unpacked.c
index ea45660..1dcd2ec 100644
--- a/test/CodeGen/packed-nest-unpacked.c
+++ b/test/CodeGen/packed-nest-unpacked.c
@@ -9,40 +9,40 @@
 // <rdar://problem/10463337>
 struct X test1() {
   // CHECK: @test1
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
   return g.y;
 }
 struct X test2() {
   // CHECK: @test2
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
   struct X a = g.y;
   return a;
 }
 
 void test3(struct X a) {
   // CHECK: @test3
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* {{.*}}, i64 24, i32 1, i1 false)
   g.y = a;
 }
 
 // <rdar://problem/10530444>
 void test4() {
   // CHECK: @test4
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i64 24, i32 1, i1 false)
   f(g.y);
 }
 
 // PR12395
 int test5() {
   // CHECK: @test5
-  // CHECK: load i32* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1, i32 0, i64 0), align 1
+  // CHECK: load i32, i32* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1, i32 0, i64 0), align 1
   return g.y.x[0];
 }
 
 // <rdar://problem/11220251>
 void test6() {
   // CHECK: @test6
-  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y* @g, i32 0, i32 1) to i8*), i8* %{{.*}}, i64 24, i32 1, i1 false)
+  // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.X* getelementptr inbounds (%struct.Y, %struct.Y* @g, i32 0, i32 1) to i8*), i8* %{{.*}}, i64 24, i32 1, i1 false)
   g.y = foo();
 }
 
@@ -60,6 +60,6 @@
 
 unsigned test7() {
   // CHECK: @test7
-  // CHECK: load i32* getelementptr inbounds (%struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 4
+  // CHECK: load i32, i32* getelementptr inbounds (%struct.YBitfield, %struct.YBitfield* @gbitfield, i32 0, i32 1, i32 0), align 4
   return gbitfield.y.b2;
 }
diff --git a/test/CodeGen/packed-structure.c b/test/CodeGen/packed-structure.c
index a915a54..8de31d6 100644
--- a/test/CodeGen/packed-structure.c
+++ b/test/CodeGen/packed-structure.c
@@ -17,7 +17,7 @@
 int s0_align   = __alignof(struct s0);
 
 // CHECK-FUNCTIONS-LABEL: define i32 @s0_load_x
-// CHECK-FUNCTIONS: [[s0_load_x:%.*]] = load i32* {{.*}}, align 4
+// CHECK-FUNCTIONS: [[s0_load_x:%.*]] = load i32, i32* {{.*}}, align 4
 // CHECK-FUNCTIONS: ret i32 [[s0_load_x]]
 int s0_load_x(struct s0 *a) { return a->x; }
 // FIXME: This seems like it should be align 1. This is actually something which
@@ -25,7 +25,7 @@
 // with align 1 (in 2363.1 at least).
 //
 // CHECK-FUNCTIONS-LABEL: define i32 @s0_load_y
-// CHECK-FUNCTIONS: [[s0_load_y:%.*]] = load i32* {{.*}}, align 1
+// CHECK-FUNCTIONS: [[s0_load_y:%.*]] = load i32, i32* {{.*}}, align 1
 // CHECK-FUNCTIONS: ret i32 [[s0_load_y]]
 int s0_load_y(struct s0 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s0_copy
@@ -47,11 +47,11 @@
 int s1_align   = __alignof(struct s1);
 
 // CHECK-FUNCTIONS-LABEL: define i32 @s1_load_x
-// CHECK-FUNCTIONS: [[s1_load_x:%.*]] = load i32* {{.*}}, align 1
+// CHECK-FUNCTIONS: [[s1_load_x:%.*]] = load i32, i32* {{.*}}, align 1
 // CHECK-FUNCTIONS: ret i32 [[s1_load_x]]
 int s1_load_x(struct s1 *a) { return a->x; }
 // CHECK-FUNCTIONS-LABEL: define i32 @s1_load_y
-// CHECK-FUNCTIONS: [[s1_load_y:%.*]] = load i32* {{.*}}, align 1
+// CHECK-FUNCTIONS: [[s1_load_y:%.*]] = load i32, i32* {{.*}}, align 1
 // CHECK-FUNCTIONS: ret i32 [[s1_load_y]]
 int s1_load_y(struct s1 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s1_copy
@@ -75,11 +75,11 @@
 int s2_align   = __alignof(struct s2);
 
 // CHECK-FUNCTIONS-LABEL: define i32 @s2_load_x
-// CHECK-FUNCTIONS: [[s2_load_y:%.*]] = load i32* {{.*}}, align 2
+// CHECK-FUNCTIONS: [[s2_load_y:%.*]] = load i32, i32* {{.*}}, align 2
 // CHECK-FUNCTIONS: ret i32 [[s2_load_y]]
 int s2_load_x(struct s2 *a) { return a->x; }
 // CHECK-FUNCTIONS-LABEL: define i32 @s2_load_y
-// CHECK-FUNCTIONS: [[s2_load_y:%.*]] = load i32* {{.*}}, align 2
+// CHECK-FUNCTIONS: [[s2_load_y:%.*]] = load i32, i32* {{.*}}, align 2
 // CHECK-FUNCTIONS: ret i32 [[s2_load_y]]
 int s2_load_y(struct s2 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s2_copy
@@ -94,7 +94,7 @@
 int s3_1 = __alignof(((struct s3*) 0)->anInt);
 // CHECK-FUNCTIONS-LABEL: define i32 @test3(
 int test3(struct s3 *ptr) {
-  // CHECK-FUNCTIONS:      [[PTR:%.*]] = getelementptr inbounds {{%.*}}* {{%.*}}, i32 0, i32 1
-  // CHECK-FUNCTIONS-NEXT: load i32* [[PTR]], align 1
+  // CHECK-FUNCTIONS:      [[PTR:%.*]] = getelementptr inbounds {{%.*}}, {{%.*}}* {{%.*}}, i32 0, i32 1
+  // CHECK-FUNCTIONS-NEXT: load i32, i32* [[PTR]], align 1
   return ptr->anInt;
 }
diff --git a/test/CodeGen/pointer-signext.c b/test/CodeGen/pointer-signext.c
index ac51ffc..470411f 100644
--- a/test/CodeGen/pointer-signext.c
+++ b/test/CodeGen/pointer-signext.c
@@ -5,7 +5,7 @@
 
 // CHECK:      [[P:%.*]] = add i64 %param, -8
 // CHECK-NEXT: [[Q:%.*]] = inttoptr i64 [[P]] to [[R:%.*]]*
-// CHECK-NEXT: {{%.*}} = getelementptr inbounds [[R]]* [[Q]], i64 0, i32 0
+// CHECK-NEXT: {{%.*}} = getelementptr inbounds [[R]], [[R]]* [[Q]], i64 0, i32 0
 
 #define CR(Record, TYPE, Field) \
   ((TYPE *) ((unsigned char *) (Record) - (unsigned char *) &(((TYPE *) 0)->Field)))
diff --git a/test/CodeGen/ppc-varargs-struct.c b/test/CodeGen/ppc-varargs-struct.c
index f0e075b..f5b012d 100644
--- a/test/CodeGen/ppc-varargs-struct.c
+++ b/test/CodeGen/ppc-varargs-struct.c
@@ -19,7 +19,7 @@
 // CHECK: bitcast %struct.x* %t to i8*
 // CHECK: bitcast %struct.x* %{{[0-9]+}} to i8*
 // CHECK: call void @llvm.memcpy
-// CHECK-PPC:  [[ARRAYDECAY:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+// CHECK-PPC:  [[ARRAYDECAY:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
 // CHECK-PPC-NEXT:  [[GPRPTR:%[a-z0-9]+]] = bitcast %struct.__va_list_tag* [[ARRAYDECAY]] to i8*
 // CHECK-PPC-NEXT:  [[ZERO:%[0-9]+]] = ptrtoint i8* [[GPRPTR]] to i32
 // CHECK-PPC-NEXT:  [[ONE:%[0-9]+]] = add i32 [[ZERO]], 1
@@ -28,11 +28,11 @@
 // CHECK-PPC-NEXT:  [[FOUR:%[0-9]+]] = inttoptr i32 [[THREE]] to i8**
 // CHECK-PPC-NEXT:  [[FIVE:%[0-9]+]] = add i32 [[THREE]], 4
 // CHECK-PPC-NEXT:  [[SIX:%[0-9]+]] = inttoptr i32 [[FIVE]] to i8**
-// CHECK-PPC-NEXT:  [[GPR:%[a-z0-9]+]] = load i8* [[GPRPTR]]
-// CHECK-PPC-NEXT:  [[FPR:%[a-z0-9]+]] = load i8* [[TWO]] 
-// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%[a-z_0-9]+]] = load i8** [[FOUR]]
+// CHECK-PPC-NEXT:  [[GPR:%[a-z0-9]+]] = load i8, i8* [[GPRPTR]]
+// CHECK-PPC-NEXT:  [[FPR:%[a-z0-9]+]] = load i8, i8* [[TWO]] 
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%[a-z_0-9]+]] = load i8*, i8** [[FOUR]]
 // CHECK-PPC-NEXT:  [[SEVEN:%[0-9]+]] = ptrtoint i8* [[OVERFLOW_AREA]] to i32
-// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%[a-z_0-9]+]] = load i8** [[SIX]]
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%[a-z_0-9]+]] = load i8*, i8** [[SIX]]
 // CHECK-PPC-NEXT:  [[EIGHT:%[0-9]+]] = ptrtoint i8* [[REGSAVE_AREA]] to i32
 // CHECK-PPC-NEXT:  [[COND:%[a-z0-9]+]] = icmp ult i8 [[GPR]], 8
 // CHECK-PPC-NEXT:  [[NINE:%[0-9]+]] = mul i8 [[GPR]], 4
@@ -56,7 +56,7 @@
 // CHECK-PPC1:[[CONT]]
 // CHECK-PPC:  [[VAARG_ADDR:%[a-z.0-9]+]] = phi %struct.x* [ [[TWELVE]], [[USING_REGS]] ], [ [[FOURTEEN]], [[USING_OVERFLOW]] ]
 // CHECK-PPC-NEXT:  [[AGGRPTR:%[a-z0-9]+]] = bitcast %struct.x* [[VAARG_ADDR]] to i8**
-// CHECK-PPC-NEXT:  [[AGGR:%[a-z0-9]+]] = load i8** [[AGGRPTR]]
+// CHECK-PPC-NEXT:  [[AGGR:%[a-z0-9]+]] = load i8*, i8** [[AGGRPTR]]
 // CHECK-PPC-NEXT:  [[SEVENTEEN:%[0-9]+]] = bitcast %struct.x* %t to i8*
 // CHECK-PPC-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[SEVENTEEN]], i8* [[AGGR]], i32 16, i32 8, i1 false)
 
@@ -65,7 +65,7 @@
 // CHECK: add i64 %{{[0-9]+}}, 4
 // CHECK: inttoptr i64 %{{[0-9]+}} to i8*
 // CHECK: bitcast i8* %{{[0-9]+}} to i32*
-// CHECK-PPC:  [[ARRAYDECAY1:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+// CHECK-PPC:  [[ARRAYDECAY1:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
 // CHECK-PPC-NEXT:  [[GPRPTR1:%[a-z0-9]+]] = bitcast %struct.__va_list_tag* [[ARRAYDECAY1]] to i8*
 // CHECK-PPC-NEXT:  [[EIGHTEEN:%[0-9]+]] = ptrtoint i8* [[GPRPTR1]] to i32
 // CHECK-PPC-NEXT:  [[NINETEEN:%[0-9]+]] = add i32 [[EIGHTEEN]], 1
@@ -74,11 +74,11 @@
 // CHECK-PPC-NEXT:  [[TWENTYTWO:%[0-9]+]] = inttoptr i32 [[TWENTYONE]] to i8**
 // CHECK-PPC-NEXT:  [[TWENTYTHREE:%[0-9]+]] = add i32 [[TWENTYONE]], 4
 // CHECK-PPC-NEXT:  [[TWENTYFOUR:%[0-9]+]] = inttoptr i32 [[TWENTYTHREE]] to i8**
-// CHECK-PPC-NEXT:  [[GPR1:%[a-z0-9]+]] = load i8* [[GPRPTR1]]
-// CHECK-PPC-NEXT:  [[FPR1:%[a-z0-9]+]] = load i8* [[TWENTY]]
-// CHECK-PPC-NEXT:  [[OVERFLOW_AREA1:%[a-z_0-9]+]] = load i8** [[TWENTYTWO]]
+// CHECK-PPC-NEXT:  [[GPR1:%[a-z0-9]+]] = load i8, i8* [[GPRPTR1]]
+// CHECK-PPC-NEXT:  [[FPR1:%[a-z0-9]+]] = load i8, i8* [[TWENTY]]
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA1:%[a-z_0-9]+]] = load i8*, i8** [[TWENTYTWO]]
 // CHECK-PPC-NEXT:  [[TWENTYFIVE:%[0-9]+]] = ptrtoint i8* [[OVERFLOW_AREA1]] to i32
-// CHECK-PPC-NEXT:  [[REGSAVE_AREA1:%[a-z_0-9]+]] = load i8** [[TWENTYFOUR]]
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA1:%[a-z_0-9]+]] = load i8*, i8** [[TWENTYFOUR]]
 // CHECK-PPC-NEXT:  [[TWENTYSIX:%[0-9]+]] = ptrtoint i8* [[REGSAVE_AREA1]] to i32
 // CHECK-PPC-NEXT:  [[COND1:%[a-z0-9]+]] = icmp ult i8 [[GPR1]], 8
 // CHECK-PPC-NEXT:  [[TWENTYSEVEN:%[0-9]+]] = mul i8 [[GPR1]], 4
@@ -101,12 +101,12 @@
 //
 // CHECK-PPC1:[[CONT1]]:
 // CHECK-PPC:  [[VAARG_ADDR1:%[a-z.0-9]+]] = phi i32* [ [[THIRTY]], [[USING_REGS1]] ], [ [[THIRTYTWO]], [[USING_OVERFLOW1]] ]
-// CHECK-PPC-NEXT:  [[THIRTYFIVE:%[0-9]+]] = load i32* [[VAARG_ADDR1]]
+// CHECK-PPC-NEXT:  [[THIRTYFIVE:%[0-9]+]] = load i32, i32* [[VAARG_ADDR1]]
 // CHECK-PPC-NEXT:  store i32 [[THIRTYFIVE]], i32* %v, align 4
 
 #ifdef __powerpc64__
   __int128_t u = va_arg (ap, __int128_t);
 #endif
 // CHECK: bitcast i8* %{{[a-z.0-9]+}} to i128*
-// CHECK-NEXT: load i128* %{{[0-9]+}}
+// CHECK-NEXT: load i128, i128* %{{[0-9]+}}
 }
diff --git a/test/CodeGen/ppc64-align-long-double.c b/test/CodeGen/ppc64-align-long-double.c
index 6d07f70..2214e24 100644
--- a/test/CodeGen/ppc64-align-long-double.c
+++ b/test/CodeGen/ppc64-align-long-double.c
@@ -13,4 +13,4 @@
   return x.b;
 }
 
-// CHECK: %{{[0-9]}} = load ppc_fp128* %{{[a-zA-Z0-9]+}}, align 16
+// CHECK: %{{[0-9]}} = load ppc_fp128, ppc_fp128* %{{[a-zA-Z0-9]+}}, align 16
diff --git a/test/CodeGen/ppc64-align-struct.c b/test/CodeGen/ppc64-align-struct.c
index a50c849..8c4437a 100644
--- a/test/CodeGen/ppc64-align-struct.c
+++ b/test/CodeGen/ppc64-align-struct.c
@@ -49,8 +49,8 @@
 }
 
 // CHECK: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[CUR]], i64 8
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 8
 // CHECK: store i8* %[[NEXT]], i8** %ap
 // CHECK: bitcast i8* %[[CUR]] to %struct.test1*
 struct test1 test1va (int x, ...)
@@ -64,12 +64,12 @@
 }
 
 // CHECK: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...)
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[ALIGN]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
 // CHECK: bitcast i8* %[[ALIGN]] to %struct.test2*
 struct test2 test2va (int x, ...)
@@ -83,12 +83,12 @@
 }
 
 // CHECK: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...)
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[ALIGN]], i64 32
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 32
 // CHECK: store i8* %[[NEXT]], i8** %ap
 // CHECK: bitcast i8* %[[ALIGN]] to %struct.test3*
 struct test3 test3va (int x, ...)
@@ -102,8 +102,8 @@
 }
 
 // CHECK: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...)
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[CUR]], i64 16
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
 // CHECK: bitcast i8* %[[CUR]] to %struct.test4*
 struct test4 test4va (int x, ...)
@@ -117,8 +117,8 @@
 }
 
 // CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...)
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[CUR]], i64 16
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
 // CHECK: bitcast i8* %[[CUR]] to %struct.test_longdouble*
 struct test_longdouble { long double x; };
@@ -133,12 +133,12 @@
 }
 
 // CHECK: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...)
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[ALIGN]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
 // CHECK: bitcast i8* %[[ALIGN]] to %struct.test_vector*
 struct test_vector { vector int x; };
diff --git a/test/CodeGen/ppc64-complex-parms.c b/test/CodeGen/ppc64-complex-parms.c
index fe3025a..f5583a0 100644
--- a/test/CodeGen/ppc64-complex-parms.c
+++ b/test/CodeGen/ppc64-complex-parms.c
@@ -58,14 +58,14 @@
 
 // CHECK: define void @bar_float() [[NUW]] {
 // CHECK: %[[VAR1:[A-Za-z0-9.]+]] = alloca { float, float }, align 4
-// CHECK: %[[VAR2:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR1]], i32 0, i32 0
-// CHECK: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR1]], i32 0, i32 1
+// CHECK: %[[VAR2:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 0
+// CHECK: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
 // CHECK: store float 2.000000e+00, float* %[[VAR2]]
 // CHECK: store float -2.500000e+00, float* %[[VAR3]]
-// CHECK: %[[VAR4:[A-Za-z0-9.]+]] = getelementptr { float, float }* %[[VAR1]], i32 0, i32 0
-// CHECK: %[[VAR5:[A-Za-z0-9.]+]] = load float* %[[VAR4]], align 1
-// CHECK: %[[VAR6:[A-Za-z0-9.]+]] = getelementptr { float, float }* %[[VAR1]], i32 0, i32 1
-// CHECK: %[[VAR7:[A-Za-z0-9.]+]] = load float* %[[VAR6]], align 1
+// CHECK: %[[VAR4:[A-Za-z0-9.]+]] = getelementptr { float, float }, { float, float }* %[[VAR1]], i32 0, i32 0
+// CHECK: %[[VAR5:[A-Za-z0-9.]+]] = load float, float* %[[VAR4]], align 1
+// CHECK: %[[VAR6:[A-Za-z0-9.]+]] = getelementptr { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
+// CHECK: %[[VAR7:[A-Za-z0-9.]+]] = load float, float* %[[VAR6]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call float @foo_float(float %[[VAR5]], float %[[VAR7]])
 
 void bar_double(void) {
@@ -74,14 +74,14 @@
 
 // CHECK: define void @bar_double() [[NUW]] {
 // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = alloca { double, double }, align 8
-// CHECK: %[[VAR12:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }* %[[VAR11]], i32 0, i32 0
-// CHECK: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }* %[[VAR11]], i32 0, i32 1
+// CHECK: %[[VAR12:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 0
+// CHECK: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
 // CHECK: store double 2.000000e+00, double* %[[VAR12]]
 // CHECK: store double -2.500000e+00, double* %[[VAR13]]
-// CHECK: %[[VAR14:[A-Za-z0-9.]+]] = getelementptr { double, double }* %[[VAR11]], i32 0, i32 0
-// CHECK: %[[VAR15:[A-Za-z0-9.]+]] = load double* %[[VAR14]], align 1
-// CHECK: %[[VAR16:[A-Za-z0-9.]+]] = getelementptr { double, double }* %[[VAR11]], i32 0, i32 1
-// CHECK: %[[VAR17:[A-Za-z0-9.]+]] = load double* %[[VAR16]], align 1
+// CHECK: %[[VAR14:[A-Za-z0-9.]+]] = getelementptr { double, double }, { double, double }* %[[VAR11]], i32 0, i32 0
+// CHECK: %[[VAR15:[A-Za-z0-9.]+]] = load double, double* %[[VAR14]], align 1
+// CHECK: %[[VAR16:[A-Za-z0-9.]+]] = getelementptr { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
+// CHECK: %[[VAR17:[A-Za-z0-9.]+]] = load double, double* %[[VAR16]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call double @foo_double(double %[[VAR15]], double %[[VAR17]])
 
 void bar_long_double(void) {
@@ -90,14 +90,14 @@
 
 // CHECK: define void @bar_long_double() [[NUW]] {
 // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = alloca { ppc_fp128, ppc_fp128 }, align 16
-// CHECK: %[[VAR22:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
-// CHECK: %[[VAR23:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
+// CHECK: %[[VAR22:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
+// CHECK: %[[VAR23:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
 // CHECK: store ppc_fp128 0xM40000000000000000000000000000000, ppc_fp128* %[[VAR22]]
 // CHECK: store ppc_fp128 0xMC0040000000000000000000000000000, ppc_fp128* %[[VAR23]]
-// CHECK: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
-// CHECK: %[[VAR25:[A-Za-z0-9.]+]] = load ppc_fp128* %[[VAR24]], align 1
-// CHECK: %[[VAR26:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
-// CHECK: %[[VAR27:[A-Za-z0-9.]+]] = load ppc_fp128* %[[VAR26]], align 1
+// CHECK: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
+// CHECK: %[[VAR25:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR24]], align 1
+// CHECK: %[[VAR26:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
+// CHECK: %[[VAR27:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR26]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call ppc_fp128 @foo_long_double(ppc_fp128 %[[VAR25]], ppc_fp128 %[[VAR27]])
 
 void bar_int(void) {
@@ -106,14 +106,14 @@
 
 // CHECK: define void @bar_int() [[NUW]] {
 // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = alloca { i32, i32 }, align 4
-// CHECK: %[[VAR32:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR31]], i32 0, i32 0
-// CHECK: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR31]], i32 0, i32 1
+// CHECK: %[[VAR32:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 0
+// CHECK: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
 // CHECK: store i32 2, i32* %[[VAR32]]
 // CHECK: store i32 -3, i32* %[[VAR33]]
-// CHECK: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }* %[[VAR31]], i32 0, i32 0
-// CHECK: %[[VAR35:[A-Za-z0-9.]+]] = load i32* %[[VAR34]], align 1
-// CHECK: %[[VAR36:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }* %[[VAR31]], i32 0, i32 1
-// CHECK: %[[VAR37:[A-Za-z0-9.]+]] = load i32* %[[VAR36]], align 1
+// CHECK: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 0
+// CHECK: %[[VAR35:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR34]], align 1
+// CHECK: %[[VAR36:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
+// CHECK: %[[VAR37:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR36]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i32 @foo_int(i32 %[[VAR35]], i32 %[[VAR37]])
 
 void bar_short(void) {
@@ -122,14 +122,14 @@
 
 // CHECK: define void @bar_short() [[NUW]] {
 // CHECK: %[[VAR41:[A-Za-z0-9.]+]] = alloca { i16, i16 }, align 2
-// CHECK: %[[VAR42:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR41]], i32 0, i32 0
-// CHECK: %[[VAR43:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR41]], i32 0, i32 1
+// CHECK: %[[VAR42:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 0
+// CHECK: %[[VAR43:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
 // CHECK: store i16 2, i16* %[[VAR42]]
 // CHECK: store i16 -3, i16* %[[VAR43]]
-// CHECK: %[[VAR44:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }* %[[VAR41]], i32 0, i32 0
-// CHECK: %[[VAR45:[A-Za-z0-9.]+]] = load i16* %[[VAR44]], align 1
-// CHECK: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }* %[[VAR41]], i32 0, i32 1
-// CHECK: %[[VAR47:[A-Za-z0-9.]+]] = load i16* %[[VAR46]], align 1
+// CHECK: %[[VAR44:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 0
+// CHECK: %[[VAR45:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR44]], align 1
+// CHECK: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
+// CHECK: %[[VAR47:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR46]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i16 @foo_short(i16 %[[VAR45]], i16 %[[VAR47]])
 
 void bar_char(void) {
@@ -138,14 +138,14 @@
 
 // CHECK: define void @bar_char() [[NUW]] {
 // CHECK: %[[VAR51:[A-Za-z0-9.]+]] = alloca { i8, i8 }, align 1
-// CHECK: %[[VAR52:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR51]], i32 0, i32 0
-// CHECK: %[[VAR53:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR51]], i32 0, i32 1
+// CHECK: %[[VAR52:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 0
+// CHECK: %[[VAR53:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
 // CHECK: store i8 2, i8* %[[VAR52]]
 // CHECK: store i8 -3, i8* %[[VAR53]]
-// CHECK: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }* %[[VAR51]], i32 0, i32 0
-// CHECK: %[[VAR55:[A-Za-z0-9.]+]] = load i8* %[[VAR54]], align 1
-// CHECK: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }* %[[VAR51]], i32 0, i32 1
-// CHECK: %[[VAR57:[A-Za-z0-9.]+]] = load i8* %[[VAR56]], align 1
+// CHECK: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 0
+// CHECK: %[[VAR55:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR54]], align 1
+// CHECK: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
+// CHECK: %[[VAR57:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR56]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i8 @foo_char(i8 %[[VAR55]], i8 %[[VAR57]])
 
 void bar_long(void) {
@@ -154,14 +154,14 @@
 
 // CHECK: define void @bar_long() [[NUW]] {
 // CHECK: %[[VAR61:[A-Za-z0-9.]+]] = alloca { i64, i64 }, align 8
-// CHECK: %[[VAR62:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }* %[[VAR61]], i32 0, i32 0
-// CHECK: %[[VAR63:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }* %[[VAR61]], i32 0, i32 1
+// CHECK: %[[VAR62:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 0
+// CHECK: %[[VAR63:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
 // CHECK: store i64 2, i64* %[[VAR62]]
 // CHECK: store i64 -3, i64* %[[VAR63]]
-// CHECK: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }* %[[VAR61]], i32 0, i32 0
-// CHECK: %[[VAR65:[A-Za-z0-9.]+]] = load i64* %[[VAR64]], align 1
-// CHECK: %[[VAR66:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }* %[[VAR61]], i32 0, i32 1
-// CHECK: %[[VAR67:[A-Za-z0-9.]+]] = load i64* %[[VAR66]], align 1
+// CHECK: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 0
+// CHECK: %[[VAR65:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR64]], align 1
+// CHECK: %[[VAR66:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
+// CHECK: %[[VAR67:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR66]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call i64 @foo_long(i64 %[[VAR65]], i64 %[[VAR67]])
 
 void bar_long_long(void) {
@@ -170,14 +170,14 @@
 
 // CHECK: define void @bar_long_long() [[NUW]] {
 // CHECK: %[[VAR71:[A-Za-z0-9.]+]] = alloca { i64, i64 }, align 8
-// CHECK: %[[VAR72:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }* %[[VAR71]], i32 0, i32 0
-// CHECK: %[[VAR73:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }* %[[VAR71]], i32 0, i32 1
+// CHECK: %[[VAR72:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 0
+// CHECK: %[[VAR73:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
 // CHECK: store i64 2, i64* %[[VAR72]]
 // CHECK: store i64 -3, i64* %[[VAR73]]
-// CHECK: %[[VAR74:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }* %[[VAR71]], i32 0, i32 0
-// CHECK: %[[VAR75:[A-Za-z0-9.]+]] = load i64* %[[VAR74]], align 1
-// CHECK: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }* %[[VAR71]], i32 0, i32 1
-// CHECK: %[[VAR77:[A-Za-z0-9.]+]] = load i64* %[[VAR76]], align 1
+// CHECK: %[[VAR74:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 0
+// CHECK: %[[VAR75:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR74]], align 1
+// CHECK: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
+// CHECK: %[[VAR77:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR76]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call i64 @foo_long_long(i64 %[[VAR75]], i64 %[[VAR77]])
 
 // CHECK: attributes [[NUW]] = { nounwind{{.*}} }
diff --git a/test/CodeGen/ppc64-elf-abi.c b/test/CodeGen/ppc64-elf-abi.c
index 0dd183e..59112a0 100644
--- a/test/CodeGen/ppc64-elf-abi.c
+++ b/test/CodeGen/ppc64-elf-abi.c
@@ -7,6 +7,8 @@
 // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s \
 // RUN:   -target-abi elfv1 | FileCheck %s --check-prefix=CHECK-ELFv1
 // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s \
+// RUN:   -target-abi elfv1-qpx | FileCheck %s --check-prefix=CHECK-ELFv1
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s \
 // RUN:   -target-abi elfv2 | FileCheck %s --check-prefix=CHECK-ELFv2
 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s \
 // RUN:   | FileCheck %s --check-prefix=CHECK-ELFv2
@@ -19,22 +21,3 @@
 // CHECK-ELFv2: define [2 x float] @func_fab([2 x float] %x.coerce)
 struct fab { float a; float b; };
 struct fab func_fab(struct fab x) { return x; }
-
-// Verify ABI choice is passed on to the back end
-
-// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -S -o - %s \
-// RUN:   | FileCheck %s --check-prefix=CHECK-ASM-ELFv1
-// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -S -o - %s \
-// RUN:   -target-abi elfv1 | FileCheck %s --check-prefix=CHECK-ASM-ELFv1
-// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -S -o - %s \
-// RUN:   -target-abi elfv2 | FileCheck %s --check-prefix=CHECK-ASM-ELFv2
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -S -o - %s \
-// RUN:   | FileCheck %s --check-prefix=CHECK-ASM-ELFv2
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -S -o - %s \
-// RUN:   -target-abi elfv1 | FileCheck %s --check-prefix=CHECK-ASM-ELFv1
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -S -o - %s \
-// RUN:   -target-abi elfv2 | FileCheck %s --check-prefix=CHECK-ASM-ELFv2
-
-// CHECK-ASM-ELFv2: .abiversion 2
-// CHECK-ASM-ELFv1-NOT: .abiversion 2
-
diff --git a/test/CodeGen/ppc64-qpx-vector.c b/test/CodeGen/ppc64-qpx-vector.c
new file mode 100644
index 0000000..c42d4b3
--- /dev/null
+++ b/test/CodeGen/ppc64-qpx-vector.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s -check-prefix=ALL -check-prefix=NORMAL
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - -target-abi elfv1-qpx %s | FileCheck %s -check-prefix=ALL -check-prefix=QPX
+
+typedef float v4sf __attribute__((vector_size(16)));
+typedef double v4df __attribute__((vector_size(32)));
+
+struct ssf { v4sf v; };
+struct sdf { v4df v; };
+
+struct ssf2 { v4sf v[2]; };
+struct sdf2 { v4df v[2]; };
+
+v4sf foo1(struct ssf a, v4sf b, struct ssf2 c) {
+  return a.v + b;
+}
+
+// ALL-LABEL: define <4 x float> @foo1(<4 x float> inreg %a.coerce, <4 x float> %b, [2 x i128] %c.coerce)
+// ALL: ret <4 x float>
+
+v4df foo2(struct sdf a, v4df b, struct sdf2 c) {
+  return a.v + b;
+}
+
+// QPX-LABEL: define <4 x double> @foo2(<4 x double> inreg %a.coerce, <4 x double> %b, [2 x i256] %c.coerce)
+// QPX: ret <4 x double>
+
+// NORMAL-LABEL: define void @foo2(<4 x double>* noalias sret %agg.result, [2 x i128] %a.coerce, <4 x double>*, [4 x i128] %c.coerce)
+// NORMAL: ret void
+
diff --git a/test/CodeGen/ppc64-struct-onefloat.c b/test/CodeGen/ppc64-struct-onefloat.c
index 11b16a4..d0ccfbe 100644
--- a/test/CodeGen/ppc64-struct-onefloat.c
+++ b/test/CodeGen/ppc64-struct-onefloat.c
@@ -13,15 +13,15 @@
 // CHECK:  %b = alloca %struct.s2, align 8
 // CHECK:  %d = alloca %struct.s4, align 4
 // CHECK:  %e = alloca %struct.s5, align 8
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1* %a, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %a, i32 0, i32 0
 // CHECK:  store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2* %b, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %b, i32 0, i32 0
 // CHECK:  store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4* %d, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %d, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
 // CHECK:  store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5* %e, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %e, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
 // CHECK:  store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 1
 // CHECK:  ret void
 
@@ -35,15 +35,15 @@
 }
 
 // CHECK-LABEL: define void @foo
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1* %p1, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2* %p2, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load double* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4* %p4, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load float* %{{[a-zA-Z0-9.]+}}, align 1
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5* %p5, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
-// CHECK:  %{{[0-9]+}} = load double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %p1, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %p2, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %p4, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 1
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %p5, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 1
 // CHECK:  call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}})
 // CHECK:  ret void
diff --git a/test/CodeGen/ppc64-varargs-complex.c b/test/CodeGen/ppc64-varargs-complex.c
index 8fc8839..f790629 100644
--- a/test/CodeGen/ppc64-varargs-complex.c
+++ b/test/CodeGen/ppc64-varargs-complex.c
@@ -8,66 +8,66 @@
   va_list ap;
 
   _Complex int i   = va_arg(ap, _Complex int);
-  // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR40]], i64 16
+  // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR40]], i64 16
   // CHECK-NEXT: store i8* %[[VAR41]], i8** %[[VAR100]]
   // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = ptrtoint i8* %[[VAR40]] to i64
   // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 4
   // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 12
   // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR2]] to i32*
   // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR3]] to i32*
-  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32* %[[VAR4]]
-  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32* %[[VAR5]]
-  // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR0]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]]
+  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0]], i32 0, i32 1
   // CHECK-NEXT: store i32 %[[VAR6]], i32* %[[VAR8]]
   // CHECK-NEXT: store i32 %[[VAR7]], i32* %[[VAR9]]
 
   _Complex short s = va_arg(ap, _Complex short);
-  // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR50]], i64 16
+  // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR50]], i64 16
   // CHECK-NEXT: store i8* %[[VAR51]], i8** %[[VAR100]]
   // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
   // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 6
   // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 14
   // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR12]] to i16*
   // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR13]] to i16*
-  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16* %[[VAR14]]
-  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16* %[[VAR15]]
-  // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR10]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]]
+  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10]], i32 0, i32 1
   // CHECK-NEXT: store i16 %[[VAR16]], i16* %[[VAR18]]
   // CHECK-NEXT: store i16 %[[VAR17]], i16* %[[VAR19]]
 
   _Complex char c  = va_arg(ap, _Complex char);
-  // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR60]], i64 16
+  // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR60]], i64 16
   // CHECK-NEXT: store i8* %[[VAR61]], i8** %[[VAR100]]
   // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
   // CHECK-NEXT: %[[VAR22:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 7
   // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 15
   // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR22]] to i8*
   // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR23]] to i8*
-  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8* %[[VAR24]]
-  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8* %[[VAR25]]
-  // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR20]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]]
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]]
+  // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20]], i32 0, i32 1
   // CHECK-NEXT: store i8 %[[VAR26]], i8* %[[VAR28]]
   // CHECK-NEXT: store i8 %[[VAR27]], i8* %[[VAR29]]
 
   _Complex float f = va_arg(ap, _Complex float);
-  // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR70]], i64 16
+  // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR70]], i64 16
   // CHECK-NEXT: store i8* %[[VAR71]], i8** %[[VAR100]]
   // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
   // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 4
   // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 12
   // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR32]] to float*
   // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR33]] to float*
-  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float* %[[VAR34]]
-  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float* %[[VAR35]]
-  // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR30]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]]
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]]
+  // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30]], i32 0, i32 1
   // CHECK-NEXT: store float %[[VAR36]], float* %[[VAR38]]
   // CHECK-NEXT: store float %[[VAR37]], float* %[[VAR39]]
 }
diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c
index e193dcc..76798c1 100644
--- a/test/CodeGen/ppc64le-aggregates.c
+++ b/test/CodeGen/ppc64le-aggregates.c
@@ -54,49 +54,49 @@
 struct f2a2b func_f2a2b(struct f2a2b x) { return x; }
 
 // CHECK-LABEL: @call_f1
-// CHECK: %[[TMP:[^ ]+]] = load float* getelementptr inbounds (%struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
 // CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
 struct f1 global_f1;
 void call_f1(void) { global_f1 = func_f1(global_f1); }
 
 // CHECK-LABEL: @call_f2
-// CHECK: %[[TMP:[^ ]+]] = load [2 x float]* getelementptr inbounds (%struct.f2* @global_f2, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* getelementptr inbounds (%struct.f2, %struct.f2* @global_f2, i32 0, i32 0), align 1
 // CHECK: call [2 x float] @func_f2([2 x float] %[[TMP]])
 struct f2 global_f2;
 void call_f2(void) { global_f2 = func_f2(global_f2); }
 
 // CHECK-LABEL: @call_f3
-// CHECK: %[[TMP:[^ ]+]] = load [3 x float]* getelementptr inbounds (%struct.f3* @global_f3, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* getelementptr inbounds (%struct.f3, %struct.f3* @global_f3, i32 0, i32 0), align 1
 // CHECK: call [3 x float] @func_f3([3 x float] %[[TMP]])
 struct f3 global_f3;
 void call_f3(void) { global_f3 = func_f3(global_f3); }
 
 // CHECK-LABEL: @call_f4
-// CHECK: %[[TMP:[^ ]+]] = load [4 x float]* getelementptr inbounds (%struct.f4* @global_f4, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [4 x float], [4 x float]* getelementptr inbounds (%struct.f4, %struct.f4* @global_f4, i32 0, i32 0), align 1
 // CHECK: call [4 x float] @func_f4([4 x float] %[[TMP]])
 struct f4 global_f4;
 void call_f4(void) { global_f4 = func_f4(global_f4); }
 
 // CHECK-LABEL: @call_f5
-// CHECK: %[[TMP:[^ ]+]] = load [5 x float]* getelementptr inbounds (%struct.f5* @global_f5, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [5 x float], [5 x float]* getelementptr inbounds (%struct.f5, %struct.f5* @global_f5, i32 0, i32 0), align 1
 // CHECK: call [5 x float] @func_f5([5 x float] %[[TMP]])
 struct f5 global_f5;
 void call_f5(void) { global_f5 = func_f5(global_f5); }
 
 // CHECK-LABEL: @call_f6
-// CHECK: %[[TMP:[^ ]+]] = load [6 x float]* getelementptr inbounds (%struct.f6* @global_f6, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [6 x float], [6 x float]* getelementptr inbounds (%struct.f6, %struct.f6* @global_f6, i32 0, i32 0), align 1
 // CHECK: call [6 x float] @func_f6([6 x float] %[[TMP]])
 struct f6 global_f6;
 void call_f6(void) { global_f6 = func_f6(global_f6); }
 
 // CHECK-LABEL: @call_f7
-// CHECK: %[[TMP:[^ ]+]] = load [7 x float]* getelementptr inbounds (%struct.f7* @global_f7, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [7 x float], [7 x float]* getelementptr inbounds (%struct.f7, %struct.f7* @global_f7, i32 0, i32 0), align 1
 // CHECK: call [7 x float] @func_f7([7 x float] %[[TMP]])
 struct f7 global_f7;
 void call_f7(void) { global_f7 = func_f7(global_f7); }
 
 // CHECK-LABEL: @call_f8
-// CHECK: %[[TMP:[^ ]+]] = load [8 x float]* getelementptr inbounds (%struct.f8* @global_f8, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [8 x float], [8 x float]* getelementptr inbounds (%struct.f8, %struct.f8* @global_f8, i32 0, i32 0), align 1
 // CHECK: call [8 x float] @func_f8([8 x float] %[[TMP]])
 struct f8 global_f8;
 void call_f8(void) { global_f8 = func_f8(global_f8); }
@@ -105,19 +105,19 @@
 // CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 1, i1 false)
-// CHECK: %[[TMP3:[^ ]+]] = load [5 x i64]* %[[TMP1]]
+// CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
 // CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
 void call_f9(void) { global_f9 = func_f9(global_f9); }
 
 // CHECK-LABEL: @call_fab
-// CHECK: %[[TMP:[^ ]+]] = load [2 x float]* bitcast (%struct.fab* @global_fab to [2 x float]*)
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float], [2 x float]* bitcast (%struct.fab* @global_fab to [2 x float]*)
 // CHECK: call [2 x float] @func_fab([2 x float] %[[TMP]])
 struct fab global_fab;
 void call_fab(void) { global_fab = func_fab(global_fab); }
 
 // CHECK-LABEL: @call_fabc
-// CHECK: %[[TMP:[^ ]+]] = load [3 x float]* bitcast (%struct.fabc* @global_fabc to [3 x float]*)
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float], [3 x float]* bitcast (%struct.fabc* @global_fabc to [3 x float]*)
 // CHECK: call [3 x float] @func_fabc([3 x float] %[[TMP]])
 struct fabc global_fabc;
 void call_fabc(void) { global_fabc = func_fabc(global_fabc); }
@@ -172,49 +172,49 @@
 struct vabc func_vabc(struct vabc x) { return x; }
 
 // CHECK-LABEL: @call_v1
-// CHECK: %[[TMP:[^ ]+]] = load <4 x i32>* getelementptr inbounds (%struct.v1* @global_v1, i32 0, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load <4 x i32>, <4 x i32>* getelementptr inbounds (%struct.v1, %struct.v1* @global_v1, i32 0, i32 0, i32 0), align 1
 // CHECK: call [1 x <4 x i32>] @func_v1(<4 x i32> inreg %[[TMP]])
 struct v1 global_v1;
 void call_v1(void) { global_v1 = func_v1(global_v1); }
 
 // CHECK-LABEL: @call_v2
-// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>]* getelementptr inbounds (%struct.v2* @global_v2, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>], [2 x <4 x i32>]* getelementptr inbounds (%struct.v2, %struct.v2* @global_v2, i32 0, i32 0), align 1
 // CHECK: call [2 x <4 x i32>] @func_v2([2 x <4 x i32>] %[[TMP]])
 struct v2 global_v2;
 void call_v2(void) { global_v2 = func_v2(global_v2); }
 
 // CHECK-LABEL: @call_v3
-// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>]* getelementptr inbounds (%struct.v3* @global_v3, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>], [3 x <4 x i32>]* getelementptr inbounds (%struct.v3, %struct.v3* @global_v3, i32 0, i32 0), align 1
 // CHECK: call [3 x <4 x i32>] @func_v3([3 x <4 x i32>] %[[TMP]])
 struct v3 global_v3;
 void call_v3(void) { global_v3 = func_v3(global_v3); }
 
 // CHECK-LABEL: @call_v4
-// CHECK: %[[TMP:[^ ]+]] = load [4 x <4 x i32>]* getelementptr inbounds (%struct.v4* @global_v4, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [4 x <4 x i32>], [4 x <4 x i32>]* getelementptr inbounds (%struct.v4, %struct.v4* @global_v4, i32 0, i32 0), align 1
 // CHECK: call [4 x <4 x i32>] @func_v4([4 x <4 x i32>] %[[TMP]])
 struct v4 global_v4;
 void call_v4(void) { global_v4 = func_v4(global_v4); }
 
 // CHECK-LABEL: @call_v5
-// CHECK: %[[TMP:[^ ]+]] = load [5 x <4 x i32>]* getelementptr inbounds (%struct.v5* @global_v5, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [5 x <4 x i32>], [5 x <4 x i32>]* getelementptr inbounds (%struct.v5, %struct.v5* @global_v5, i32 0, i32 0), align 1
 // CHECK: call [5 x <4 x i32>] @func_v5([5 x <4 x i32>] %[[TMP]])
 struct v5 global_v5;
 void call_v5(void) { global_v5 = func_v5(global_v5); }
 
 // CHECK-LABEL: @call_v6
-// CHECK: %[[TMP:[^ ]+]] = load [6 x <4 x i32>]* getelementptr inbounds (%struct.v6* @global_v6, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [6 x <4 x i32>], [6 x <4 x i32>]* getelementptr inbounds (%struct.v6, %struct.v6* @global_v6, i32 0, i32 0), align 1
 // CHECK: call [6 x <4 x i32>] @func_v6([6 x <4 x i32>] %[[TMP]])
 struct v6 global_v6;
 void call_v6(void) { global_v6 = func_v6(global_v6); }
 
 // CHECK-LABEL: @call_v7
-// CHECK: %[[TMP:[^ ]+]] = load [7 x <4 x i32>]* getelementptr inbounds (%struct.v7* @global_v7, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [7 x <4 x i32>], [7 x <4 x i32>]* getelementptr inbounds (%struct.v7, %struct.v7* @global_v7, i32 0, i32 0), align 1
 // CHECK: call [7 x <4 x i32>] @func_v7([7 x <4 x i32>] %[[TMP]])
 struct v7 global_v7;
 void call_v7(void) { global_v7 = func_v7(global_v7); }
 
 // CHECK-LABEL: @call_v8
-// CHECK: %[[TMP:[^ ]+]] = load [8 x <4 x i32>]* getelementptr inbounds (%struct.v8* @global_v8, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [8 x <4 x i32>], [8 x <4 x i32>]* getelementptr inbounds (%struct.v8, %struct.v8* @global_v8, i32 0, i32 0), align 1
 // CHECK: call [8 x <4 x i32>] @func_v8([8 x <4 x i32>] %[[TMP]])
 struct v8 global_v8;
 void call_v8(void) { global_v8 = func_v8(global_v8); }
@@ -225,13 +225,13 @@
 void call_v9(void) { global_v9 = func_v9(global_v9); }
 
 // CHECK-LABEL: @call_vab
-// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>]* bitcast (%struct.vab* @global_vab to [2 x <4 x i32>]*)
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>], [2 x <4 x i32>]* bitcast (%struct.vab* @global_vab to [2 x <4 x i32>]*)
 // CHECK: call [2 x <4 x i32>] @func_vab([2 x <4 x i32>] %[[TMP]])
 struct vab global_vab;
 void call_vab(void) { global_vab = func_vab(global_vab); }
 
 // CHECK-LABEL: @call_vabc
-// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>]* bitcast (%struct.vabc* @global_vabc to [3 x <4 x i32>]*)
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>], [3 x <4 x i32>]* bitcast (%struct.vabc* @global_vabc to [3 x <4 x i32>]*)
 // CHECK: call [3 x <4 x i32>] @func_vabc([3 x <4 x i32>] %[[TMP]])
 struct vabc global_vabc;
 void call_vabc(void) { global_vabc = func_vabc(global_vabc); }
@@ -289,49 +289,49 @@
 struct v3fabc func_v3fabc(struct v3fabc x) { return x; }
 
 // CHECK-LABEL: @call_v3f1
-// CHECK: %[[TMP:[^ ]+]] = load <3 x float>* getelementptr inbounds (%struct.v3f1* @global_v3f1, i32 0, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load <3 x float>, <3 x float>* getelementptr inbounds (%struct.v3f1, %struct.v3f1* @global_v3f1, i32 0, i32 0, i32 0), align 1
 // CHECK: call [1 x <3 x float>] @func_v3f1(<3 x float> inreg %[[TMP]])
 struct v3f1 global_v3f1;
 void call_v3f1(void) { global_v3f1 = func_v3f1(global_v3f1); }
 
 // CHECK-LABEL: @call_v3f2
-// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>]* getelementptr inbounds (%struct.v3f2* @global_v3f2, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>], [2 x <3 x float>]* getelementptr inbounds (%struct.v3f2, %struct.v3f2* @global_v3f2, i32 0, i32 0), align 1
 // CHECK: call [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %[[TMP]])
 struct v3f2 global_v3f2;
 void call_v3f2(void) { global_v3f2 = func_v3f2(global_v3f2); }
 
 // CHECK-LABEL: @call_v3f3
-// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>]* getelementptr inbounds (%struct.v3f3* @global_v3f3, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>], [3 x <3 x float>]* getelementptr inbounds (%struct.v3f3, %struct.v3f3* @global_v3f3, i32 0, i32 0), align 1
 // CHECK: call [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %[[TMP]])
 struct v3f3 global_v3f3;
 void call_v3f3(void) { global_v3f3 = func_v3f3(global_v3f3); }
 
 // CHECK-LABEL: @call_v3f4
-// CHECK: %[[TMP:[^ ]+]] = load [4 x <3 x float>]* getelementptr inbounds (%struct.v3f4* @global_v3f4, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [4 x <3 x float>], [4 x <3 x float>]* getelementptr inbounds (%struct.v3f4, %struct.v3f4* @global_v3f4, i32 0, i32 0), align 1
 // CHECK: call [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %[[TMP]])
 struct v3f4 global_v3f4;
 void call_v3f4(void) { global_v3f4 = func_v3f4(global_v3f4); }
 
 // CHECK-LABEL: @call_v3f5
-// CHECK: %[[TMP:[^ ]+]] = load [5 x <3 x float>]* getelementptr inbounds (%struct.v3f5* @global_v3f5, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [5 x <3 x float>], [5 x <3 x float>]* getelementptr inbounds (%struct.v3f5, %struct.v3f5* @global_v3f5, i32 0, i32 0), align 1
 // CHECK: call [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %[[TMP]])
 struct v3f5 global_v3f5;
 void call_v3f5(void) { global_v3f5 = func_v3f5(global_v3f5); }
 
 // CHECK-LABEL: @call_v3f6
-// CHECK: %[[TMP:[^ ]+]] = load [6 x <3 x float>]* getelementptr inbounds (%struct.v3f6* @global_v3f6, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [6 x <3 x float>], [6 x <3 x float>]* getelementptr inbounds (%struct.v3f6, %struct.v3f6* @global_v3f6, i32 0, i32 0), align 1
 // CHECK: call [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %[[TMP]])
 struct v3f6 global_v3f6;
 void call_v3f6(void) { global_v3f6 = func_v3f6(global_v3f6); }
 
 // CHECK-LABEL: @call_v3f7
-// CHECK: %[[TMP:[^ ]+]] = load [7 x <3 x float>]* getelementptr inbounds (%struct.v3f7* @global_v3f7, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [7 x <3 x float>], [7 x <3 x float>]* getelementptr inbounds (%struct.v3f7, %struct.v3f7* @global_v3f7, i32 0, i32 0), align 1
 // CHECK: call [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %[[TMP]])
 struct v3f7 global_v3f7;
 void call_v3f7(void) { global_v3f7 = func_v3f7(global_v3f7); }
 
 // CHECK-LABEL: @call_v3f8
-// CHECK: %[[TMP:[^ ]+]] = load [8 x <3 x float>]* getelementptr inbounds (%struct.v3f8* @global_v3f8, i32 0, i32 0), align 1
+// CHECK: %[[TMP:[^ ]+]] = load [8 x <3 x float>], [8 x <3 x float>]* getelementptr inbounds (%struct.v3f8, %struct.v3f8* @global_v3f8, i32 0, i32 0), align 1
 // CHECK: call [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %[[TMP]])
 struct v3f8 global_v3f8;
 void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); }
@@ -342,13 +342,13 @@
 void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); }
 
 // CHECK-LABEL: @call_v3fab
-// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <3 x float>]*)
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>], [2 x <3 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <3 x float>]*)
 // CHECK: call [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %[[TMP]])
 struct v3fab global_v3fab;
 void call_v3fab(void) { global_v3fab = func_v3fab(global_v3fab); }
 
 // CHECK-LABEL: @call_v3fabc
-// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <3 x float>]*)
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>], [3 x <3 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <3 x float>]*)
 // CHECK: call [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %[[TMP]])
 struct v3fabc global_v3fabc;
 void call_v3fabc(void) { global_v3fabc = func_v3fabc(global_v3fabc); }
diff --git a/test/CodeGen/ppc64le-varargs-complex.c b/test/CodeGen/ppc64le-varargs-complex.c
index b89f462..68dfa0b 100644
--- a/test/CodeGen/ppc64le-varargs-complex.c
+++ b/test/CodeGen/ppc64le-varargs-complex.c
@@ -8,62 +8,62 @@
   va_list ap;
 
   _Complex int i   = va_arg(ap, _Complex int);
-  // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR40]], i64 16
+  // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR40]], i64 16
   // CHECK-NEXT: store i8* %[[VAR41]], i8** %[[VAR100]]
   // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = ptrtoint i8* %[[VAR40]] to i64
   // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 8
   // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR1]] to i32*
   // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR3]] to i32*
-  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32* %[[VAR4]]
-  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32* %[[VAR5]]
-  // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR0]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]]
+  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0]], i32 0, i32 1
   // CHECK-NEXT: store i32 %[[VAR6]], i32* %[[VAR8]]
   // CHECK-NEXT: store i32 %[[VAR7]], i32* %[[VAR9]]
 
   _Complex short s = va_arg(ap, _Complex short);
-  // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR50]], i64 16
+  // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR50]], i64 16
   // CHECK-NEXT: store i8* %[[VAR51]], i8** %[[VAR100]]
   // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
   // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 8
   // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR11]] to i16*
   // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR13]] to i16*
-  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16* %[[VAR14]]
-  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16* %[[VAR15]]
-  // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR10]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]]
+  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10]], i32 0, i32 1
   // CHECK-NEXT: store i16 %[[VAR16]], i16* %[[VAR18]]
   // CHECK-NEXT: store i16 %[[VAR17]], i16* %[[VAR19]]
 
   _Complex char c  = va_arg(ap, _Complex char);
-  // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR60]], i64 16
+  // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR60]], i64 16
   // CHECK-NEXT: store i8* %[[VAR61]], i8** %[[VAR100]]
   // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
   // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 8
   // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR21]] to i8*
   // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR23]] to i8*
-  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8* %[[VAR24]]
-  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8* %[[VAR25]]
-  // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR20]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]]
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]]
+  // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20]], i32 0, i32 1
   // CHECK-NEXT: store i8 %[[VAR26]], i8* %[[VAR28]]
   // CHECK-NEXT: store i8 %[[VAR27]], i8* %[[VAR29]]
 
   _Complex float f = va_arg(ap, _Complex float);
-  // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR70]], i64 16
+  // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR70]], i64 16
   // CHECK-NEXT: store i8* %[[VAR71]], i8** %[[VAR100]]
   // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
   // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 8
   // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR31]] to float*
   // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR33]] to float*
-  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float* %[[VAR34]]
-  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float* %[[VAR35]]
-  // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
-  // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR30]], i32 0, i32 1
+  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]]
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]]
+  // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30]], i32 0, i32 1
   // CHECK-NEXT: store float %[[VAR36]], float* %[[VAR38]]
   // CHECK-NEXT: store float %[[VAR37]], float* %[[VAR39]]
 }
diff --git a/test/CodeGen/pr12251.c b/test/CodeGen/pr12251.c
index ea74cc6..dd5e4a1 100644
--- a/test/CodeGen/pr12251.c
+++ b/test/CodeGen/pr12251.c
@@ -6,6 +6,6 @@
 }
 
 // CHECK-LABEL: define i32 @g1
-// CHECK: load i32* %x, align 4
+// CHECK: load i32, i32* %x, align 4
 // CHECK-NOT: range
 // CHECK: ret
diff --git a/test/CodeGen/pr4349.c b/test/CodeGen/pr4349.c
index 0169958..e39dc2c 100644
--- a/test/CodeGen/pr4349.c
+++ b/test/CodeGen/pr4349.c
@@ -21,17 +21,17 @@
 {
     { &((cpu.pc).w[0]) }
 };
-// CHECK: @svars2 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8* bitcast (%struct.cpu* @cpu to i8*), i64 1) }]
+// CHECK: @svars2 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 1) }]
 struct svar svars2[] =
 {
     { &((cpu.pc).b[0][1]) }
 };
-// CHECK: @svars3 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8* bitcast (%struct.cpu* @cpu to i8*), i64 2) }]
+// CHECK: @svars3 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 2) }]
 struct svar svars3[] =
 {
     { &((cpu.pc).w[1]) }
 };
-// CHECK: @svars4 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8* bitcast (%struct.cpu* @cpu to i8*), i64 3) }]
+// CHECK: @svars4 = global [1 x %struct.svar] [%struct.svar { i8* getelementptr (i8, i8* bitcast (%struct.cpu* @cpu to i8*), i64 3) }]
 struct svar svars4[] =
 {
     { &((cpu.pc).b[1][1]) }
diff --git a/test/CodeGen/pragma-comment.c b/test/CodeGen/pragma-comment.c
index 0a7d3c7..fbae9d5 100644
--- a/test/CodeGen/pragma-comment.c
+++ b/test/CodeGen/pragma-comment.c
@@ -30,4 +30,3 @@
 // PS4: !{!"\01msvcrt.lib"}
 // PS4: !{!"\01kernel32"}
 // PS4: !{!"\01USER32.LIB"}
-// PS4: !{!" /bar=2"}
diff --git a/test/CodeGen/redefine_extname.c b/test/CodeGen/redefine_extname.c
index e73a3ad..a91e5b8 100644
--- a/test/CodeGen/redefine_extname.c
+++ b/test/CodeGen/redefine_extname.c
@@ -12,4 +12,4 @@
 // Check that the call to fake() is emitted as a call to real()
 // CHECK:   call i32 @real()
 // Check that this also works with variables names
-// CHECK:   load i32* @alias
+// CHECK:   load i32, i32* @alias
diff --git a/test/CodeGen/sparcv9-abi.c b/test/CodeGen/sparcv9-abi.c
index d4fff81..bf44719 100644
--- a/test/CodeGen/sparcv9-abi.c
+++ b/test/CodeGen/sparcv9-abi.c
@@ -131,29 +131,29 @@
   va_start(ap, f);
   while ((c = *f++)) switch (c) {
 
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
-// CHECK-DAG: %[[EXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 4
+// CHECK-DAG: %[[EXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 4
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[EXT]] to i32*
-// CHECK-DAG: load i32* %[[ADR]]
+// CHECK-DAG: load i32, i32* %[[ADR]]
 // CHECK: br
   case 'i':
     s += va_arg(ap, int);
     break;
 
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to i64*
-// CHECK-DAG: load i64* %[[ADR]]
+// CHECK-DAG: load i64, i64* %[[ADR]]
 // CHECK: br
   case 'l':
     s += va_arg(ap, long);
     break;
 
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.tiny*
 // CHECK: br
@@ -161,8 +161,8 @@
     s += va_arg(ap, struct tiny).a;
     break;
 
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 16
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 16
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.small*
 // CHECK: br
@@ -170,11 +170,11 @@
     s += *va_arg(ap, struct small).a;
     break;
 
-// CHECK: %[[CUR:[^ ]+]] = load i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8* %[[CUR]], i32 8
+// CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[IND:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.medium**
-// CHECK-DAG: %[[ADR:[^ ]+]] = load %struct.medium** %[[IND]]
+// CHECK-DAG: %[[ADR:[^ ]+]] = load %struct.medium*, %struct.medium** %[[IND]]
 // CHECK: br
   case 'm':
     s += *va_arg(ap, struct medium).a;
diff --git a/test/CodeGen/sparcv9-dwarf.c b/test/CodeGen/sparcv9-dwarf.c
index 11ac28c..c75b09f 100644
--- a/test/CodeGen/sparcv9-dwarf.c
+++ b/test/CodeGen/sparcv9-dwarf.c
@@ -8,92 +8,92 @@
 }
 
 // CHECK-LABEL: define signext i32 @test()
-// CHECK:       store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 0)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 1)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 2)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 3)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 4)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 5)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 6)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 7)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 8)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 9)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 10)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 11)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 12)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 13)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 14)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 15)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 16)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 17)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 18)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 19)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 20)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 21)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 22)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 23)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 24)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 25)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 26)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 27)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 28)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 29)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 30)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 31)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 32)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 33)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 34)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 35)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 36)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 37)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 38)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 39)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 40)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 41)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 42)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 43)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 44)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 45)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 46)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 47)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 48)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 49)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 50)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 51)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 52)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 53)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 54)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 55)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 56)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 57)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 58)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 59)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 60)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 61)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 62)
-// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 63)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 64)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 65)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 66)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 67)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 68)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 69)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 70)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 71)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 72)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 73)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 74)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 75)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 76)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 77)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 78)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 79)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 80)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 81)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 82)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 83)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 84)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 85)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 86)
-// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8]* @dwarf_reg_size_table, i32 0, i32 87)
+// CHECK:       store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 0)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 1)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 2)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 3)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 4)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 5)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 6)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 7)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 8)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 9)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 10)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 11)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 12)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 13)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 14)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 15)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 16)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 17)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 18)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 19)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 20)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 21)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 22)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 23)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 24)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 25)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 26)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 27)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 28)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 29)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 30)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 31)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 32)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 33)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 34)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 35)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 36)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 37)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 38)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 39)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 40)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 41)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 42)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 43)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 44)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 45)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 46)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 47)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 48)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 49)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 50)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 51)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 52)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 53)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 54)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 55)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 56)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 57)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 58)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 59)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 60)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 61)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 62)
+// CHECK-NEXT:  store i8 4, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 63)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 64)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 65)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 66)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 67)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 68)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 69)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 70)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 71)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 72)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 73)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 74)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 75)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 76)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 77)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 78)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 79)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 80)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 81)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 82)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 83)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 84)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 85)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 86)
+// CHECK-NEXT:  store i8 8, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @dwarf_reg_size_table, i32 0, i32 87)
 // CHECK-NEXT:  ret i32 14
diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c
index 3feca74..6d66cca 100644
--- a/test/CodeGen/sse-builtins.c
+++ b/test/CodeGen/sse-builtins.c
@@ -36,7 +36,7 @@
 
 __m128 test_loadl_pi(__m128 x, void* y) {
   // CHECK: define {{.*}} @test_loadl_pi
-  // CHECK: load <2 x float>* {{.*}}, align 1{{$}}
+  // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
   // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   return _mm_loadl_pi(x,y);
@@ -44,7 +44,7 @@
 
 __m128 test_loadh_pi(__m128 x, void* y) {
   // CHECK: define {{.*}} @test_loadh_pi
-  // CHECK: load <2 x float>* {{.*}}, align 1{{$}}
+  // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   return _mm_loadh_pi(x,y);
@@ -52,13 +52,13 @@
 
 __m128 test_load_ss(void* y) {
   // CHECK: define {{.*}} @test_load_ss
-  // CHECK: load float* {{.*}}, align 1{{$}}
+  // CHECK: load float, float* {{.*}}, align 1{{$}}
   return _mm_load_ss(y);
 }
 
 __m128 test_load1_ps(void* y) {
   // CHECK: define {{.*}} @test_load1_ps
-  // CHECK: load float* {{.*}}, align 1{{$}}
+  // CHECK: load float, float* {{.*}}, align 1{{$}}
   return _mm_load1_ps(y);
 }
 
@@ -70,31 +70,31 @@
 
 __m128d test_load1_pd(__m128 x, void* y) {
   // CHECK: define {{.*}} @test_load1_pd
-  // CHECK: load double* {{.*}}, align 1{{$}}
+  // CHECK: load double, double* {{.*}}, align 1{{$}}
   return _mm_load1_pd(y);
 }
 
 __m128d test_loadr_pd(__m128 x, void* y) {
   // CHECK: define {{.*}} @test_loadr_pd
-  // CHECK: load <2 x double>* {{.*}}, align 16{{$}}
+  // CHECK: load <2 x double>, <2 x double>* {{.*}}, align 16{{$}}
   return _mm_loadr_pd(y);
 }
 
 __m128d test_load_sd(void* y) {
   // CHECK: define {{.*}} @test_load_sd
-  // CHECK: load double* {{.*}}, align 1{{$}}
+  // CHECK: load double, double* {{.*}}, align 1{{$}}
   return _mm_load_sd(y);
 }
 
 __m128d test_loadh_pd(__m128d x, void* y) {
   // CHECK: define {{.*}} @test_loadh_pd
-  // CHECK: load double* {{.*}}, align 1{{$}}
+  // CHECK: load double, double* {{.*}}, align 1{{$}}
   return _mm_loadh_pd(x, y);
 }
 
 __m128d test_loadl_pd(__m128d x, void* y) {
   // CHECK: define {{.*}} @test_loadl_pd
-  // CHECK: load double* {{.*}}, align 1{{$}}
+  // CHECK: load double, double* {{.*}}, align 1{{$}}
   return _mm_loadl_pd(x, y);
 }
 
@@ -131,7 +131,7 @@
 
 __m128i test_loadl_epi64(void* y) {
   // CHECK: define {{.*}} @test_loadl_epi64
-  // CHECK: load i64* {{.*}}, align 1{{$}}
+  // CHECK: load i64, i64* {{.*}}, align 1{{$}}
   return _mm_loadl_epi64(y);
 }
 
diff --git a/test/CodeGen/systemz-inline-asm.c b/test/CodeGen/systemz-inline-asm.c
index c937233..92ed4bb 100644
--- a/test/CodeGen/systemz-inline-asm.c
+++ b/test/CodeGen/systemz-inline-asm.c
@@ -124,8 +124,8 @@
   asm("axbr %0, %2" : "=f" (f) : "0" (f), "f" (g));
   return f;
 // CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly, fp128* nocapture readonly)
-// CHECK: %f = load fp128* %0
-// CHECK: %g = load fp128* %1
+// CHECK: %f = load fp128, fp128* %0
+// CHECK: %g = load fp128, fp128* %1
 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g)
 // CHECK: store fp128 [[RESULT]], fp128* [[DEST]]
 }
diff --git a/test/CodeGen/target-data.c b/test/CodeGen/target-data.c
index 2f1c971..4cb7176 100644
--- a/test/CodeGen/target-data.c
+++ b/test/CodeGen/target-data.c
@@ -8,11 +8,11 @@
 
 // RUN: %clang_cc1 -triple i686-unknown-win32 -emit-llvm -o - %s | \
 // RUN:     FileCheck --check-prefix=I686-WIN32 %s
-// I686-WIN32: target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+// I686-WIN32: target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-S32"
 
 // RUN: %clang_cc1 -triple i686-unknown-cygwin -emit-llvm -o - %s | \
 // RUN:     FileCheck --check-prefix=I686-CYGWIN %s
-// I686-CYGWIN: target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+// I686-CYGWIN: target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-S32"
 
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
 // RUN:     FileCheck --check-prefix=X86_64 %s
diff --git a/test/CodeGen/tbaa.cpp b/test/CodeGen/tbaa.cpp
index 4a723f1..2bff5d0 100644
--- a/test/CodeGen/tbaa.cpp
+++ b/test/CodeGen/tbaa.cpp
@@ -203,9 +203,9 @@
 char g13(struct five *a, struct five *b) {
   return a->b;
 // CHECK: define signext i8 @{{.*}}(
-// CHECK: load i8* %{{.*}}, align 1, !tbaa [[TAG_char:!.*]]
+// CHECK: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_char:!.*]]
 // PATH: define signext i8 @{{.*}}(
-// PATH: load i8* %{{.*}}, align 1, !tbaa [[TAG_five_b:!.*]]
+// PATH: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_five_b:!.*]]
 }
 
 struct six {
@@ -216,9 +216,9 @@
 };
 char g14(struct six *a, struct six *b) {
 // CHECK: define signext i8 @{{.*}}(
-// CHECK: load i8* %{{.*}}, align 1, !tbaa [[TAG_char]]
+// CHECK: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_char]]
 // PATH: define signext i8 @{{.*}}(
-// PATH: load i8* %{{.*}}, align 1, !tbaa [[TAG_six_b:!.*]]
+// PATH: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_six_b:!.*]]
   return a->b;
 }
 
diff --git a/test/CodeGen/trapv.c b/test/CodeGen/trapv.c
index 5103410..0484240 100644
--- a/test/CodeGen/trapv.c
+++ b/test/CodeGen/trapv.c
@@ -6,14 +6,14 @@
 // CHECK-LABEL: define void @test0()
 void test0() {
   // -ftrapv doesn't affect unsigned arithmetic.
-  // CHECK:      [[T1:%.*]] = load i32* @uj
-  // CHECK-NEXT: [[T2:%.*]] = load i32* @uk
+  // CHECK:      [[T1:%.*]] = load i32, i32* @uj
+  // CHECK-NEXT: [[T2:%.*]] = load i32, i32* @uk
   // CHECK-NEXT: [[T3:%.*]] = add i32 [[T1]], [[T2]]
   // CHECK-NEXT: store i32 [[T3]], i32* @ui
   ui = uj + uk;
 
-  // CHECK:      [[T1:%.*]] = load i32* @j
-  // CHECK-NEXT: [[T2:%.*]] = load i32* @k
+  // CHECK:      [[T1:%.*]] = load i32, i32* @j
+  // CHECK-NEXT: [[T2:%.*]] = load i32, i32* @k
   // CHECK-NEXT: [[T3:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[T1]], i32 [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T3]], 0
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i32, i1 } [[T3]], 1
@@ -28,7 +28,7 @@
   extern void opaque(int);
   opaque(i++);
 
-  // CHECK:      [[T1:%.*]] = load i32* @i
+  // CHECK:      [[T1:%.*]] = load i32, i32* @i
   // CHECK-NEXT: [[T2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[T1]], i32 1)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i32, i1 } [[T2]], 0
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T2]], 1
@@ -42,7 +42,7 @@
   extern void opaque(int);
   opaque(++i);
 
-  // CHECK:      [[T1:%.*]] = load i32* @i
+  // CHECK:      [[T1:%.*]] = load i32, i32* @i
   // CHECK-NEXT: [[T2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[T1]], i32 1)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i32, i1 } [[T2]], 0
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T2]], 1
diff --git a/test/CodeGen/unsigned-overflow.c b/test/CodeGen/unsigned-overflow.c
index 01ed0bf..c91be33 100644
--- a/test/CodeGen/unsigned-overflow.c
+++ b/test/CodeGen/unsigned-overflow.c
@@ -11,8 +11,8 @@
 // CHECK-LABEL: define void @testlongadd()
 void testlongadd() {
 
-  // CHECK:      [[T1:%.*]] = load i64* @lj
-  // CHECK-NEXT: [[T2:%.*]] = load i64* @lk
+  // CHECK:      [[T1:%.*]] = load i64, i64* @lj
+  // CHECK-NEXT: [[T2:%.*]] = load i64, i64* @lk
   // CHECK-NEXT: [[T3:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[T1]], i64 [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i64, i1 } [[T3]], 0
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i64, i1 } [[T3]], 1
@@ -23,8 +23,8 @@
 // CHECK-LABEL: define void @testlongsub()
 void testlongsub() {
 
-  // CHECK:      [[T1:%.*]] = load i64* @lj
-  // CHECK-NEXT: [[T2:%.*]] = load i64* @lk
+  // CHECK:      [[T1:%.*]] = load i64, i64* @lj
+  // CHECK-NEXT: [[T2:%.*]] = load i64, i64* @lk
   // CHECK-NEXT: [[T3:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[T1]], i64 [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i64, i1 } [[T3]], 0
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i64, i1 } [[T3]], 1
@@ -35,8 +35,8 @@
 // CHECK-LABEL: define void @testlongmul()
 void testlongmul() {
 
-  // CHECK:      [[T1:%.*]] = load i64* @lj
-  // CHECK-NEXT: [[T2:%.*]] = load i64* @lk
+  // CHECK:      [[T1:%.*]] = load i64, i64* @lj
+  // CHECK-NEXT: [[T2:%.*]] = load i64, i64* @lk
   // CHECK-NEXT: [[T3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[T1]], i64 [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i64, i1 } [[T3]], 0
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i64, i1 } [[T3]], 1
@@ -48,7 +48,7 @@
 void testlongpostinc() {
   opaquelong(li++);
 
-  // CHECK:      [[T1:%.*]] = load i64* @li
+  // CHECK:      [[T1:%.*]] = load i64, i64* @li
   // CHECK-NEXT: [[T2:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[T1]], i64 1)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i64, i1 } [[T2]], 0
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i64, i1 } [[T2]], 1
@@ -59,7 +59,7 @@
 void testlongpreinc() {
   opaquelong(++li);
 
-  // CHECK:      [[T1:%.*]] = load i64* @li
+  // CHECK:      [[T1:%.*]] = load i64, i64* @li
   // CHECK-NEXT: [[T2:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[T1]], i64 1)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i64, i1 } [[T2]], 0
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i64, i1 } [[T2]], 1
@@ -69,8 +69,8 @@
 // CHECK-LABEL: define void @testintadd()
 void testintadd() {
 
-  // CHECK:      [[T1:%.*]] = load i32* @ij
-  // CHECK-NEXT: [[T2:%.*]] = load i32* @ik
+  // CHECK:      [[T1:%.*]] = load i32, i32* @ij
+  // CHECK-NEXT: [[T2:%.*]] = load i32, i32* @ik
   // CHECK-NEXT: [[T3:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[T1]], i32 [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T3]], 0
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i32, i1 } [[T3]], 1
@@ -81,8 +81,8 @@
 // CHECK-LABEL: define void @testintsub()
 void testintsub() {
 
-  // CHECK:      [[T1:%.*]] = load i32* @ij
-  // CHECK-NEXT: [[T2:%.*]] = load i32* @ik
+  // CHECK:      [[T1:%.*]] = load i32, i32* @ij
+  // CHECK-NEXT: [[T2:%.*]] = load i32, i32* @ik
   // CHECK-NEXT: [[T3:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[T1]], i32 [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T3]], 0
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i32, i1 } [[T3]], 1
@@ -93,8 +93,8 @@
 // CHECK-LABEL: define void @testintmul()
 void testintmul() {
 
-  // CHECK:      [[T1:%.*]] = load i32* @ij
-  // CHECK-NEXT: [[T2:%.*]] = load i32* @ik
+  // CHECK:      [[T1:%.*]] = load i32, i32* @ij
+  // CHECK-NEXT: [[T2:%.*]] = load i32, i32* @ik
   // CHECK-NEXT: [[T3:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[T1]], i32 [[T2]])
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T3]], 0
   // CHECK-NEXT: [[T5:%.*]] = extractvalue { i32, i1 } [[T3]], 1
@@ -106,7 +106,7 @@
 void testintpostinc() {
   opaqueint(ii++);
 
-  // CHECK:      [[T1:%.*]] = load i32* @ii
+  // CHECK:      [[T1:%.*]] = load i32, i32* @ii
   // CHECK-NEXT: [[T2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[T1]], i32 1)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i32, i1 } [[T2]], 0
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T2]], 1
@@ -117,7 +117,7 @@
 void testintpreinc() {
   opaqueint(++ii);
 
-  // CHECK:      [[T1:%.*]] = load i32* @ii
+  // CHECK:      [[T1:%.*]] = load i32, i32* @ii
   // CHECK-NEXT: [[T2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[T1]], i32 1)
   // CHECK-NEXT: [[T3:%.*]] = extractvalue { i32, i1 } [[T2]], 0
   // CHECK-NEXT: [[T4:%.*]] = extractvalue { i32, i1 } [[T2]], 1
diff --git a/test/CodeGen/unsigned-promotion.c b/test/CodeGen/unsigned-promotion.c
index 2c34152..4e7a442 100644
--- a/test/CodeGen/unsigned-promotion.c
+++ b/test/CodeGen/unsigned-promotion.c
@@ -15,16 +15,16 @@
 // CHECKS-LABEL:   define void @testshortadd()
 // CHECKU-LABEL: define void @testshortadd()
 void testshortadd() {
-  // CHECKS:        load i16* @sj
-  // CHECKS:        load i16* @sk
+  // CHECKS:        load i16, i16* @sj
+  // CHECKS:        load i16, i16* @sk
   // CHECKS:        [[T1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[T2:%.*]], i32 [[T3:%.*]])
   // CHECKS-NEXT:   [[T4:%.*]] = extractvalue { i32, i1 } [[T1]], 0
   // CHECKS-NEXT:   [[T5:%.*]] = extractvalue { i32, i1 } [[T1]], 1
   // CHECKS:        call void @__ubsan_handle_add_overflow
   //
-  // CHECKU:      [[T1:%.*]] = load i16* @sj
+  // CHECKU:      [[T1:%.*]] = load i16, i16* @sj
   // CHECKU:      [[T2:%.*]] = zext i16 [[T1]]
-  // CHECKU:      [[T3:%.*]] = load i16* @sk
+  // CHECKU:      [[T3:%.*]] = load i16, i16* @sk
   // CHECKU:      [[T4:%.*]] = zext i16 [[T3]]
   // CHECKU-NOT:  llvm.sadd
   // CHECKU-NOT:  llvm.uadd
@@ -37,16 +37,16 @@
 // CHECKU-LABEL: define void @testshortsub()
 void testshortsub() {
 
-  // CHECKS:        load i16* @sj
-  // CHECKS:        load i16* @sk
+  // CHECKS:        load i16, i16* @sj
+  // CHECKS:        load i16, i16* @sk
   // CHECKS:        [[T1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[T2:%.*]], i32 [[T3:%.*]])
   // CHECKS-NEXT:   [[T4:%.*]] = extractvalue { i32, i1 } [[T1]], 0
   // CHECKS-NEXT:   [[T5:%.*]] = extractvalue { i32, i1 } [[T1]], 1
   // CHECKS:        call void @__ubsan_handle_sub_overflow
   //
-  // CHECKU:      [[T1:%.*]] = load i16* @sj
+  // CHECKU:      [[T1:%.*]] = load i16, i16* @sj
   // CHECKU:      [[T2:%.*]] = zext i16 [[T1]]
-  // CHECKU:      [[T3:%.*]] = load i16* @sk
+  // CHECKU:      [[T3:%.*]] = load i16, i16* @sk
   // CHECKU:      [[T4:%.*]] = zext i16 [[T3]]
   // CHECKU-NOT:  llvm.ssub
   // CHECKU-NOT:  llvm.usub
@@ -59,16 +59,16 @@
 // CHECKU-LABEL: define void @testshortmul()
 void testshortmul() {
 
-  // CHECKS:        load i16* @sj
-  // CHECKS:        load i16* @sk
+  // CHECKS:        load i16, i16* @sj
+  // CHECKS:        load i16, i16* @sk
   // CHECKS:        [[T1:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[T2:%.*]], i32 [[T3:%.*]])
   // CHECKS-NEXT:   [[T4:%.*]] = extractvalue { i32, i1 } [[T1]], 0
   // CHECKS-NEXT:   [[T5:%.*]] = extractvalue { i32, i1 } [[T1]], 1
   // CHECKS:        call void @__ubsan_handle_mul_overflow
   //
-  // CHECKU:      [[T1:%.*]] = load i16* @sj
+  // CHECKU:      [[T1:%.*]] = load i16, i16* @sj
   // CHECKU:      [[T2:%.*]] = zext i16 [[T1]]
-  // CHECKU:      [[T3:%.*]] = load i16* @sk
+  // CHECKU:      [[T3:%.*]] = load i16, i16* @sk
   // CHECKU:      [[T4:%.*]] = zext i16 [[T3]]
   // CHECKU-NOT:  llvm.smul
   // CHECKU-NOT:  llvm.umul
@@ -80,16 +80,16 @@
 // CHECKU-LABEL: define void @testcharadd()
 void testcharadd() {
 
-  // CHECKS:        load i8* @cj
-  // CHECKS:        load i8* @ck
+  // CHECKS:        load i8, i8* @cj
+  // CHECKS:        load i8, i8* @ck
   // CHECKS:        [[T1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[T2:%.*]], i32 [[T3:%.*]])
   // CHECKS-NEXT:   [[T4:%.*]] = extractvalue { i32, i1 } [[T1]], 0
   // CHECKS-NEXT:   [[T5:%.*]] = extractvalue { i32, i1 } [[T1]], 1
   // CHECKS:        call void @__ubsan_handle_add_overflow
   //
-  // CHECKU:      [[T1:%.*]] = load i8* @cj
+  // CHECKU:      [[T1:%.*]] = load i8, i8* @cj
   // CHECKU:      [[T2:%.*]] = zext i8 [[T1]]
-  // CHECKU:      [[T3:%.*]] = load i8* @ck
+  // CHECKU:      [[T3:%.*]] = load i8, i8* @ck
   // CHECKU:      [[T4:%.*]] = zext i8 [[T3]]
   // CHECKU-NOT:  llvm.sadd
   // CHECKU-NOT:  llvm.uadd
@@ -102,16 +102,16 @@
 // CHECKU-LABEL: define void @testcharsub()
 void testcharsub() {
 
-  // CHECKS:        load i8* @cj
-  // CHECKS:        load i8* @ck
+  // CHECKS:        load i8, i8* @cj
+  // CHECKS:        load i8, i8* @ck
   // CHECKS:        [[T1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[T2:%.*]], i32 [[T3:%.*]])
   // CHECKS-NEXT:   [[T4:%.*]] = extractvalue { i32, i1 } [[T1]], 0
   // CHECKS-NEXT:   [[T5:%.*]] = extractvalue { i32, i1 } [[T1]], 1
   // CHECKS:        call void @__ubsan_handle_sub_overflow
   //
-  // CHECKU:      [[T1:%.*]] = load i8* @cj
+  // CHECKU:      [[T1:%.*]] = load i8, i8* @cj
   // CHECKU:      [[T2:%.*]] = zext i8 [[T1]]
-  // CHECKU:      [[T3:%.*]] = load i8* @ck
+  // CHECKU:      [[T3:%.*]] = load i8, i8* @ck
   // CHECKU:      [[T4:%.*]] = zext i8 [[T3]]
   // CHECKU-NOT:  llvm.ssub
   // CHECKU-NOT:  llvm.usub
@@ -124,16 +124,16 @@
 // CHECKU-LABEL: define void @testcharmul()
 void testcharmul() {
 
-  // CHECKS:        load i8* @cj
-  // CHECKS:        load i8* @ck
+  // CHECKS:        load i8, i8* @cj
+  // CHECKS:        load i8, i8* @ck
   // CHECKS:        [[T1:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[T2:%.*]], i32 [[T3:%.*]])
   // CHECKS-NEXT:   [[T4:%.*]] = extractvalue { i32, i1 } [[T1]], 0
   // CHECKS-NEXT:   [[T5:%.*]] = extractvalue { i32, i1 } [[T1]], 1
   // CHECKS:        call void @__ubsan_handle_mul_overflow
   //
-  // CHECKU:      [[T1:%.*]] = load i8* @cj
+  // CHECKU:      [[T1:%.*]] = load i8, i8* @cj
   // CHECKU:      [[T2:%.*]] = zext i8 [[T1]]
-  // CHECKU:      [[T3:%.*]] = load i8* @ck
+  // CHECKU:      [[T3:%.*]] = load i8, i8* @ck
   // CHECKU:      [[T4:%.*]] = zext i8 [[T3]]
   // CHECKU-NOT:  llvm.smul
   // CHECKU-NOT:  llvm.umul
diff --git a/test/CodeGen/variadic-gpfp-x86.c b/test/CodeGen/variadic-gpfp-x86.c
index 735c4be..854899b 100644
--- a/test/CodeGen/variadic-gpfp-x86.c
+++ b/test/CodeGen/variadic-gpfp-x86.c
@@ -8,8 +8,8 @@
 
 struct Bar foo(__builtin_va_list ap) {
   return __builtin_va_arg(ap, struct Bar);
-// CHECK: [[FPOP:%.*]] = getelementptr inbounds %struct.__va_list_tag* {{.*}}, i32 0, i32 1
-// CHECK: [[FPO:%.*]] = load i32* [[FPOP]]
-// CHECK: [[FPVEC:%.*]] = getelementptr i8* {{.*}}, i32 [[FPO]]
+// CHECK: [[FPOP:%.*]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* {{.*}}, i32 0, i32 1
+// CHECK: [[FPO:%.*]] = load i32, i32* [[FPOP]]
+// CHECK: [[FPVEC:%.*]] = getelementptr i8, i8* {{.*}}, i32 [[FPO]]
 // CHECK: bitcast i8* [[FPVEC]] to <2 x float>*
 }
diff --git a/test/CodeGen/vla.c b/test/CodeGen/vla.c
index e6cdd5d..0f2e2cd 100644
--- a/test/CodeGen/vla.c
+++ b/test/CodeGen/vla.c
@@ -79,7 +79,7 @@
 {
   GLOB = 0;
   char b[1][n+3];			/* Variable length array.  */
-  // CHECK:  [[tmp_1:%.*]] = load i32* @GLOB, align 4
+  // CHECK:  [[tmp_1:%.*]] = load i32, i32* @GLOB, align 4
   // CHECK-NEXT: add nsw i32 [[tmp_1]], 1
   __typeof__(b[GLOB++]) c;
   return GLOB;
@@ -92,13 +92,13 @@
   // CHECK-NEXT: [[PV:%.*]] = alloca [5 x double]*, align 4
   // CHECK-NEXT: store
   // CHECK-NEXT: store
-  // CHECK-NEXT: [[N:%.*]] = load i32* [[NV]], align 4
-  // CHECK-NEXT: [[P:%.*]] = load [5 x double]** [[PV]], align 4
+  // CHECK-NEXT: [[N:%.*]] = load i32, i32* [[NV]], align 4
+  // CHECK-NEXT: [[P:%.*]] = load [5 x double]*, [5 x double]** [[PV]], align 4
   // CHECK-NEXT: [[T0:%.*]] = mul nsw i32 1, [[N]]
-  // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [5 x double]* [[P]], i32 [[T0]]
-  // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [5 x double]* [[T1]], i32 2
-  // CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds [5 x double]* [[T2]], i32 0, i32 3
-  // CHECK-NEXT: [[T4:%.*]] = load double* [[T3]]
+  // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [5 x double], [5 x double]* [[P]], i32 [[T0]]
+  // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [5 x double], [5 x double]* [[T1]], i32 2
+  // CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds [5 x double], [5 x double]* [[T2]], i32 0, i32 3
+  // CHECK-NEXT: [[T4:%.*]] = load double, double* [[T3]]
   // CHECK-NEXT: ret double [[T4]]
  return p[1][2][3];
 }
@@ -112,27 +112,27 @@
   // CHECK-NEXT: store [6 x i8]*
 
   // VLA captures.
-  // CHECK-NEXT: [[DIM0:%.*]] = load i32* [[N]], align 4
-  // CHECK-NEXT: [[T0:%.*]] = load i32* [[N]], align 4
+  // CHECK-NEXT: [[DIM0:%.*]] = load i32, i32* [[N]], align 4
+  // CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[N]], align 4
   // CHECK-NEXT: [[DIM1:%.*]] = add i32 [[T0]], 1
 
-  // CHECK-NEXT: [[T0:%.*]] = load [6 x i8]** [[P]], align 4
-  // CHECK-NEXT: [[T1:%.*]] = load i32* [[N]], align 4
+  // CHECK-NEXT: [[T0:%.*]] = load [6 x i8]*, [6 x i8]** [[P]], align 4
+  // CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N]], align 4
   // CHECK-NEXT: [[T2:%.*]] = udiv i32 [[T1]], 2
   // CHECK-NEXT: [[T3:%.*]] = mul nuw i32 [[DIM0]], [[DIM1]]
   // CHECK-NEXT: [[T4:%.*]] = mul nsw i32 [[T2]], [[T3]]
-  // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [6 x i8]* [[T0]], i32 [[T4]]
-  // CHECK-NEXT: [[T6:%.*]] = load i32* [[N]], align 4
+  // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* [[T0]], i32 [[T4]]
+  // CHECK-NEXT: [[T6:%.*]] = load i32, i32* [[N]], align 4
   // CHECK-NEXT: [[T7:%.*]] = udiv i32 [[T6]], 4
   // CHECK-NEXT: [[T8:%.*]] = sub i32 0, [[T7]]
   // CHECK-NEXT: [[T9:%.*]] = mul nuw i32 [[DIM0]], [[DIM1]]
   // CHECK-NEXT: [[T10:%.*]] = mul nsw i32 [[T8]], [[T9]]
-  // CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds [6 x i8]* [[T5]], i32 [[T10]]
+  // CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* [[T5]], i32 [[T10]]
   // CHECK-NEXT: store [6 x i8]* [[T11]], [6 x i8]** [[P2]], align 4
   __typeof(p) p2 = (p + n/2) - n/4;
 
-  // CHECK-NEXT: [[T0:%.*]] = load [6 x i8]** [[P2]], align 4
-  // CHECK-NEXT: [[T1:%.*]] = load [6 x i8]** [[P]], align 4
+  // CHECK-NEXT: [[T0:%.*]] = load [6 x i8]*, [6 x i8]** [[P2]], align 4
+  // CHECK-NEXT: [[T1:%.*]] = load [6 x i8]*, [6 x i8]** [[P]], align 4
   // CHECK-NEXT: [[T2:%.*]] = ptrtoint [6 x i8]* [[T0]] to i32
   // CHECK-NEXT: [[T3:%.*]] = ptrtoint [6 x i8]* [[T1]] to i32
   // CHECK-NEXT: [[T4:%.*]] = sub i32 [[T2]], [[T3]]
@@ -154,16 +154,16 @@
   // CHECK-NEXT: store i32 0, i32* [[I]], align 4
 
   (typeof(++i, (int (*)[i])a)){&a} += 0;
-  // CHECK-NEXT: [[Z:%.*]] = load i32* [[I]], align 4
+  // CHECK-NEXT: [[Z:%.*]] = load i32, i32* [[I]], align 4
   // CHECK-NEXT: [[INC:%.*]]  = add nsw i32 [[Z]], 1
   // CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4
-  // CHECK-NEXT: [[O:%.*]] = load i32* [[I]], align 4
-  // CHECK-NEXT: [[AR:%.*]] = getelementptr inbounds [5 x i32]* [[A]], i32 0, i32 0
+  // CHECK-NEXT: [[O:%.*]] = load i32, i32* [[I]], align 4
+  // CHECK-NEXT: [[AR:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[A]], i32 0, i32 0
   // CHECK-NEXT: [[T:%.*]] = bitcast [5 x i32]* [[A]] to i32*
   // CHECK-NEXT: store i32* [[T]], i32** [[CL]]
-  // CHECK-NEXT: [[TH:%.*]] = load i32** [[CL]]
+  // CHECK-NEXT: [[TH:%.*]] = load i32*, i32** [[CL]]
   // CHECK-NEXT: [[VLAIX:%.*]] = mul nsw i32 0, [[O]]
-  // CHECK-NEXT: [[ADDPTR:%.*]] = getelementptr inbounds i32* [[TH]], i32 [[VLAIX]]
+  // CHECK-NEXT: [[ADDPTR:%.*]] = getelementptr inbounds i32, i32* [[TH]], i32 [[VLAIX]]
   // CHECK-NEXT: store i32* [[ADDPTR]], i32** [[CL]]
 }
 
@@ -178,15 +178,15 @@
   // CHECK-NEXT: [[CL:%.*]] = alloca i32**, align 4
   // CHECK-NEXT: store i32 20, i32* [[N]], align 4
   // CHECK-NEXT: store i32 0, i32* [[I]], align 4
-  // CHECK-NEXT: [[Z:%.*]] = load i32* [[I]], align 4
+  // CHECK-NEXT: [[Z:%.*]] = load i32, i32* [[I]], align 4
   // CHECK-NEXT: [[O:%.*]] = bitcast i32*** [[A]] to i32**
   // CHECK-NEXT: store i32** [[O]], i32*** [[CL]]
-  // CHECK-NEXT: [[T:%.*]] = load i32*** [[CL]]
-  // CHECK-NEXT: [[IX:%.*]] = getelementptr inbounds i32** [[T]], i32 0
-  // CHECK-NEXT: [[TH:%.*]] = load i32** [[IX]], align 4
+  // CHECK-NEXT: [[T:%.*]] = load i32**, i32*** [[CL]]
+  // CHECK-NEXT: [[IX:%.*]] = getelementptr inbounds i32*, i32** [[T]], i32 0
+  // CHECK-NEXT: [[TH:%.*]] = load i32*, i32** [[IX]], align 4
   // CHECK-NEXT: [[F:%.*]] = mul nsw i32 1, [[Z]]
-  // CHECK-NEXT: [[IX1:%.*]] = getelementptr inbounds i32* [[TH]], i32 [[F]]
-  // CHECK-NEXT: [[IX2:%.*]] = getelementptr inbounds i32* [[IX1]], i32 5
+  // CHECK-NEXT: [[IX1:%.*]] = getelementptr inbounds i32, i32* [[TH]], i32 [[F]]
+  // CHECK-NEXT: [[IX2:%.*]] = getelementptr inbounds i32, i32* [[IX1]], i32 5
   // CHECK-NEXT: store i32 0, i32* [[IX2]], align 4
 }
 
diff --git a/test/CodeGen/volatile-1.c b/test/CodeGen/volatile-1.c
index d1861d5..71cd5f8 100644
--- a/test/CodeGen/volatile-1.c
+++ b/test/CodeGen/volatile-1.c
@@ -24,47 +24,47 @@
 
 // CHECK-LABEL: define void @test()
 void test() {
-  // CHECK: load volatile [[INT]]* @i
+  // CHECK: load volatile [[INT]], [[INT]]* @i
   i;
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
   // CHECK-NEXT: sitofp [[INT]]
   (float)(ci);
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
   (void)ci;
   // CHECK-NEXT: bitcast
   // CHECK-NEXT: memcpy
   (void)a;
-  // CHECK-NEXT: [[R:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[I:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
-  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
   (void)(ci=ci);
-  // CHECK-NEXT: [[T:%.*]] = load volatile [[INT]]* @j
+  // CHECK-NEXT: [[T:%.*]] = load volatile [[INT]], [[INT]]* @j
   // CHECK-NEXT: store volatile [[INT]] [[T]], [[INT]]* @i
   (void)(i=j);
-  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
-  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
   // Not sure why they're ordered this way.
   // CHECK-NEXT: [[R:%.*]] = add [[INT]] [[R2]], [[R1]]
   // CHECK-NEXT: [[I:%.*]] = add [[INT]] [[I2]], [[I1]]
-  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
   ci+=ci;
 
-  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
-  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R1:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I1:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
   // CHECK-NEXT: [[R:%.*]] = add [[INT]] [[R2]], [[R1]]
   // CHECK-NEXT: [[I:%.*]] = add [[INT]] [[I2]], [[I1]]
-  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
-  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[R]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile [[INT]] [[I]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R2:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I2:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1), align 4
   // These additions can be elided
   // CHECK-NEXT: add [[INT]] [[R]], [[R2]]
   // CHECK-NEXT: add [[INT]] [[I]], [[I2]]
@@ -192,9 +192,9 @@
   // CHECK-NEXT: store volatile
   // CHECK-NEXT: store volatile
   ci=ci=ci;
-  // CHECK-NEXT: [[T:%.*]] = load volatile [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
-  // CHECK-NEXT: store volatile [[INT]] [[T]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
-  // CHECK-NEXT: store volatile [[INT]] [[T]], [[INT]]* getelementptr inbounds ([[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: [[T:%.*]] = load volatile [[INT]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: store volatile [[INT]] [[T]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1)
+  // CHECK-NEXT: store volatile [[INT]] [[T]], [[INT]]* getelementptr inbounds ([[CINT]], [[CINT]]* @ci, i32 0, i32 1)
   __imag ci = __imag ci = __imag ci;
   // CHECK-NEXT: load volatile
   // CHECK-NEXT: store volatile
@@ -316,9 +316,9 @@
 
 // CHECK: define {{.*}} @test2()
 int test2() {
-  // CHECK: load volatile i32*
-  // CHECK-NEXT: load volatile i32*
-  // CHECK-NEXT: load volatile i32*
+  // CHECK: load volatile i32, i32*
+  // CHECK-NEXT: load volatile i32, i32*
+  // CHECK-NEXT: load volatile i32, i32*
   // CHECK-NEXT: add i32
   // CHECK-NEXT: add i32
   // CHECK-NEXT: store volatile i32
diff --git a/test/CodeGen/volatile-2.c b/test/CodeGen/volatile-2.c
index 18d0d31..84cbc1e 100644
--- a/test/CodeGen/volatile-2.c
+++ b/test/CodeGen/volatile-2.c
@@ -3,8 +3,8 @@
 void test0() {
   // CHECK-LABEL: define void @test0()
   // CHECK:      [[F:%.*]] = alloca float
-  // CHECK-NEXT: [[REAL:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @test0_v, i32 0, i32 0), align 4
-  // CHECK-NEXT: load volatile float* getelementptr inbounds ({{.*}} @test0_v, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[REAL:%.*]] = load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @test0_v, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile float, float* getelementptr inbounds ({{.*}} @test0_v, i32 0, i32 1), align 4
   // CHECK-NEXT: store float [[REAL]], float* [[F]], align 4
   // CHECK-NEXT: ret void
   extern volatile _Complex float test0_v;
@@ -13,8 +13,8 @@
 
 void test1() {
   // CHECK-LABEL: define void @test1()
-  // CHECK:      [[REAL:%.*]] = load volatile float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[IMAG:%.*]] = load volatile float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 1), align 4
+  // CHECK:      [[REAL:%.*]] = load volatile float, float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[IMAG:%.*]] = load volatile float, float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 1), align 4
   // CHECK-NEXT: store volatile float [[REAL]], float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 0), align 4
   // CHECK-NEXT: store volatile float [[IMAG]], float* getelementptr inbounds ({{.*}} @test1_v, i32 0, i32 1), align 4
   // CHECK-NEXT: ret void
diff --git a/test/CodeGen/volatile-complex.c b/test/CodeGen/volatile-complex.c
index fd5e52b..cd8f72d 100644
--- a/test/CodeGen/volatile-complex.c
+++ b/test/CodeGen/volatile-complex.c
@@ -14,52 +14,52 @@
 
 // CHECK-LABEL: define void @test_cf()
 void test_cf() {
-  // CHECK:      load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 0), align 4
-  // CHECK-NEXT: load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 1), align 4
+  // CHECK:      load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
+  // CHECK-NEXT: load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
   (void)(cf);
-  // CHECK-NEXT: [[R:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 0), align 4
-  // CHECK-NEXT: [[I:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 1), align 4
-  // CHECK-NEXT: store volatile float [[R]], float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 0), align 4
-  // CHECK-NEXT: store volatile float [[I]], float* getelementptr inbounds ({ float, float }* @cf, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R:%.*]] = load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
+  // CHECK-NEXT: [[I:%.*]] = load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile float [[R]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 0), align 4
+  // CHECK-NEXT: store volatile float [[I]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf, i32 0, i32 1), align 4
   (void)(cf=cf);
   // CHECK-NEXT: ret void
 }
 
 // CHECK-LABEL: define void @test_cd()
 void test_cd() {
-  // CHECK:      load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 0), align 8
-  // CHECK-NEXT: load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 1), align 8
+  // CHECK:      load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd, i32 0, i32 0), align 8
+  // CHECK-NEXT: load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd, i32 0, i32 1), align 8
   (void)(cd);
-  // CHECK-NEXT: [[R:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 0), align 8
-  // CHECK-NEXT: [[I:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 1), align 8
-  // CHECK-NEXT: store volatile double [[R]], double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 0), align 8
-  // CHECK-NEXT: store volatile double [[I]], double* getelementptr inbounds ({ double, double }* @cd, i32 0, i32 1), align 8
+  // CHECK-NEXT: [[R:%.*]] = load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd, i32 0, i32 0), align 8
+  // CHECK-NEXT: [[I:%.*]] = load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd, i32 0, i32 1), align 8
+  // CHECK-NEXT: store volatile double [[R]], double* getelementptr inbounds ({ double, double }, { double, double }* @cd, i32 0, i32 0), align 8
+  // CHECK-NEXT: store volatile double [[I]], double* getelementptr inbounds ({ double, double }, { double, double }* @cd, i32 0, i32 1), align 8
   (void)(cd=cd);
   // CHECK-NEXT: ret void
 }
 
 // CHECK-LABEL: define void @test_cf32()
 void test_cf32() {
-  // CHECK:      load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 0), align 32
-  // CHECK-NEXT: load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 1), align 4
+  // CHECK:      load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf32, i32 0, i32 0), align 32
+  // CHECK-NEXT: load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf32, i32 0, i32 1), align 4
   (void)(cf32);
-  // CHECK-NEXT: [[R:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 0), align 32
-  // CHECK-NEXT: [[I:%.*]] = load volatile float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 1), align 4
-  // CHECK-NEXT: store volatile float [[R]], float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 0), align 32
-  // CHECK-NEXT: store volatile float [[I]], float* getelementptr inbounds ({ float, float }* @cf32, i32 0, i32 1), align 4
+  // CHECK-NEXT: [[R:%.*]] = load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf32, i32 0, i32 0), align 32
+  // CHECK-NEXT: [[I:%.*]] = load volatile float, float* getelementptr inbounds ({ float, float }, { float, float }* @cf32, i32 0, i32 1), align 4
+  // CHECK-NEXT: store volatile float [[R]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf32, i32 0, i32 0), align 32
+  // CHECK-NEXT: store volatile float [[I]], float* getelementptr inbounds ({ float, float }, { float, float }* @cf32, i32 0, i32 1), align 4
   (void)(cf32=cf32);
   // CHECK-NEXT: ret void
 }
 
 // CHECK-LABEL: define void @test_cd32()
 void test_cd32() {
-  // CHECK:      load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 0), align 32
-  // CHECK-NEXT: load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 1), align 8
+  // CHECK:      load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd32, i32 0, i32 0), align 32
+  // CHECK-NEXT: load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd32, i32 0, i32 1), align 8
   (void)(cd32);
-  // CHECK-NEXT: [[R:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 0), align 32
-  // CHECK-NEXT: [[I:%.*]] = load volatile double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 1), align 8
-  // CHECK-NEXT: store volatile double [[R]], double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 0), align 32
-  // CHECK-NEXT: store volatile double [[I]], double* getelementptr inbounds ({ double, double }* @cd32, i32 0, i32 1), align 8
+  // CHECK-NEXT: [[R:%.*]] = load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd32, i32 0, i32 0), align 32
+  // CHECK-NEXT: [[I:%.*]] = load volatile double, double* getelementptr inbounds ({ double, double }, { double, double }* @cd32, i32 0, i32 1), align 8
+  // CHECK-NEXT: store volatile double [[R]], double* getelementptr inbounds ({ double, double }, { double, double }* @cd32, i32 0, i32 0), align 32
+  // CHECK-NEXT: store volatile double [[I]], double* getelementptr inbounds ({ double, double }, { double, double }* @cd32, i32 0, i32 1), align 8
   (void)(cd32=cd32);
   // CHECK-NEXT: ret void
 }
diff --git a/test/CodeGen/volatile.c b/test/CodeGen/volatile.c
index 3e891aa..52915f6 100644
--- a/test/CodeGen/volatile.c
+++ b/test/CodeGen/volatile.c
@@ -41,67 +41,67 @@
 // CHECK: [[I:%[a-zA-Z0-9_.]+]] = alloca i32
   // load
   i=S;
-// CHECK: load i32* @S
+// CHECK: load i32, i32* @S
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vS;
-// CHECK: load volatile i32* @vS
+// CHECK: load volatile i32, i32* @vS
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=*pS;
-// CHECK: [[PS_VAL:%[a-zA-Z0-9_.]+]] = load i32** @pS
-// CHECK: load i32* [[PS_VAL]]
+// CHECK: [[PS_VAL:%[a-zA-Z0-9_.]+]] = load i32*, i32** @pS
+// CHECK: load i32, i32* [[PS_VAL]]
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=*pvS;
-// CHECK: [[PVS_VAL:%[a-zA-Z0-9_.]+]] = load i32** @pvS
-// CHECK: load volatile i32* [[PVS_VAL]]
+// CHECK: [[PVS_VAL:%[a-zA-Z0-9_.]+]] = load i32*, i32** @pvS
+// CHECK: load volatile i32, i32* [[PVS_VAL]]
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=A[2];
-// CHECK: load i32* getelementptr {{.*}} @A
+// CHECK: load i32, i32* getelementptr {{.*}} @A
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vA[2];
-// CHECK: load volatile i32* getelementptr {{.*}} @vA
+// CHECK: load volatile i32, i32* getelementptr {{.*}} @vA
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=F.x;
-// CHECK: load i32* getelementptr {{.*}} @F
+// CHECK: load i32, i32* getelementptr {{.*}} @F
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vF.x;
-// CHECK: load volatile i32* getelementptr {{.*}} @vF
+// CHECK: load volatile i32, i32* getelementptr {{.*}} @vF
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=F2.x;
-// CHECK: load i32* getelementptr {{.*}} @F2
+// CHECK: load i32, i32* getelementptr {{.*}} @F2
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vF2.x;
-// CHECK: load volatile i32* getelementptr {{.*}} @vF2
+// CHECK: load volatile i32, i32* getelementptr {{.*}} @vF2
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vpF2->x;
-// CHECK: [[VPF2_VAL:%[a-zA-Z0-9_.]+]] = load {{%[a-zA-Z0-9_.]+}}** @vpF2
+// CHECK: [[VPF2_VAL:%[a-zA-Z0-9_.]+]] = load {{%[a-zA-Z0-9_.]+}}*, {{%[a-zA-Z0-9_.]+}}** @vpF2
 // CHECK: [[ELT:%[a-zA-Z0-9_.]+]] = getelementptr {{.*}} [[VPF2_VAL]]
-// CHECK: load volatile i32* [[ELT]]
+// CHECK: load volatile i32, i32* [[ELT]]
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=F3.x.y;
-// CHECK: load i32* getelementptr {{.*}} @F3
+// CHECK: load i32, i32* getelementptr {{.*}} @F3
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vF3.x.y;
-// CHECK: load volatile i32* getelementptr {{.*}} @vF3
+// CHECK: load volatile i32, i32* getelementptr {{.*}} @vF3
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=BF.x;
-// CHECK-IT: load i8* getelementptr {{.*}} @BF
-// CHECK-MS: load i32* getelementptr {{.*}} @BF
+// CHECK-IT: load i8, i8* getelementptr {{.*}} @BF
+// CHECK-MS: load i32, i32* getelementptr {{.*}} @BF
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vBF.x;
-// CHECK-IT: load volatile i8* getelementptr {{.*}} @vBF
-// CHECK-MS: load volatile i32* getelementptr {{.*}} @vBF
+// CHECK-IT: load volatile i8, i8* getelementptr {{.*}} @vBF
+// CHECK-MS: load volatile i32, i32* getelementptr {{.*}} @vBF
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=V[3];
-// CHECK: load <4 x i32>* @V
+// CHECK: load <4 x i32>, <4 x i32>* @V
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vV[3];
-// CHECK: load volatile <4 x i32>* @vV
+// CHECK: load volatile <4 x i32>, <4 x i32>* @vV
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=VE.yx[1];
-// CHECK: load <4 x i32>* @VE
+// CHECK: load <4 x i32>, <4 x i32>* @VE
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vVE.zy[1];
-// CHECK: load volatile <4 x i32>* @vVE
+// CHECK: load volatile <4 x i32>, <4 x i32>* @vVE
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i = aggFct().x; // Note: not volatile
   // N.b. Aggregate return is extremely target specific, all we can
@@ -110,92 +110,92 @@
 // CHECK-NOT: load volatile
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vtS;
-// CHECK: load volatile i32* @vtS
+// CHECK: load volatile i32, i32* @vtS
 // CHECK: store i32 {{.*}}, i32* [[I]]
 
 
   // store
   S=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store i32 {{.*}}, i32* @S
   vS=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store volatile i32 {{.*}}, i32* @vS
   *pS=i;
-// CHECK: load i32* [[I]]
-// CHECK: [[PS_VAL:%[a-zA-Z0-9_.]+]] = load i32** @pS
+// CHECK: load i32, i32* [[I]]
+// CHECK: [[PS_VAL:%[a-zA-Z0-9_.]+]] = load i32*, i32** @pS
 // CHECK: store i32 {{.*}}, i32* [[PS_VAL]]
   *pvS=i;
-// CHECK: load i32* [[I]]
-// CHECK: [[PVS_VAL:%[a-zA-Z0-9_.]+]] = load i32** @pvS
+// CHECK: load i32, i32* [[I]]
+// CHECK: [[PVS_VAL:%[a-zA-Z0-9_.]+]] = load i32*, i32** @pvS
 // CHECK: store volatile i32 {{.*}}, i32* [[PVS_VAL]]
   A[2]=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store i32 {{.*}}, i32* getelementptr {{.*}} @A
   vA[2]=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store volatile i32 {{.*}}, i32* getelementptr {{.*}} @vA
   F.x=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store i32 {{.*}}, i32* getelementptr {{.*}} @F
   vF.x=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store volatile i32 {{.*}}, i32* getelementptr {{.*}} @vF
   F2.x=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store i32 {{.*}}, i32* getelementptr {{.*}} @F2
   vF2.x=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store volatile i32 {{.*}}, i32* getelementptr {{.*}} @vF2
   vpF2->x=i;
-// CHECK: load i32* [[I]]
-// CHECK: [[VPF2_VAL:%[a-zA-Z0-9_.]+]] = load {{%[a-zA-Z0-9._]+}}** @vpF2
+// CHECK: load i32, i32* [[I]]
+// CHECK: [[VPF2_VAL:%[a-zA-Z0-9_.]+]] = load {{%[a-zA-Z0-9._]+}}*, {{%[a-zA-Z0-9._]+}}** @vpF2
 // CHECK: [[ELT:%[a-zA-Z0-9_.]+]] = getelementptr {{.*}} [[VPF2_VAL]]
 // CHECK: store volatile i32 {{.*}}, i32* [[ELT]]
   vF3.x.y=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store volatile i32 {{.*}}, i32* getelementptr {{.*}} @vF3
   BF.x=i;
-// CHECK: load i32* [[I]]
-// CHECK-IT: load i8* getelementptr {{.*}} @BF
-// CHECK-MS: load i32* getelementptr {{.*}} @BF
+// CHECK: load i32, i32* [[I]]
+// CHECK-IT: load i8, i8* getelementptr {{.*}} @BF
+// CHECK-MS: load i32, i32* getelementptr {{.*}} @BF
 // CHECK-IT: store i8 {{.*}}, i8* getelementptr {{.*}} @BF
 // CHECK-MS: store i32 {{.*}}, i32* getelementptr {{.*}} @BF
   vBF.x=i;
-// CHECK: load i32* [[I]]
-// CHECK-IT: load volatile i8* getelementptr {{.*}} @vBF
-// CHECK-MS: load volatile i32* getelementptr {{.*}} @vBF
+// CHECK: load i32, i32* [[I]]
+// CHECK-IT: load volatile i8, i8* getelementptr {{.*}} @vBF
+// CHECK-MS: load volatile i32, i32* getelementptr {{.*}} @vBF
 // CHECK-IT: store volatile i8 {{.*}}, i8* getelementptr {{.*}} @vBF
 // CHECK-MS: store volatile i32 {{.*}}, i32* getelementptr {{.*}} @vBF
   V[3]=i;
-// CHECK: load i32* [[I]]
-// CHECK: load <4 x i32>* @V
+// CHECK: load i32, i32* [[I]]
+// CHECK: load <4 x i32>, <4 x i32>* @V
 // CHECK: store <4 x i32> {{.*}}, <4 x i32>* @V
   vV[3]=i;
-// CHECK: load i32* [[I]]
-// CHECK: load volatile <4 x i32>* @vV
+// CHECK: load i32, i32* [[I]]
+// CHECK: load volatile <4 x i32>, <4 x i32>* @vV
 // CHECK: store volatile <4 x i32> {{.*}}, <4 x i32>* @vV
   vtS=i;
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* [[I]]
 // CHECK: store volatile i32 {{.*}}, i32* @vtS
 
   // other ops:
   ++S;
-// CHECK: load i32* @S
+// CHECK: load i32, i32* @S
 // CHECK: store i32 {{.*}}, i32* @S
   ++vS;
-// CHECK: load volatile i32* @vS
+// CHECK: load volatile i32, i32* @vS
 // CHECK: store volatile i32 {{.*}}, i32* @vS
   i+=S;
-// CHECK: load i32* @S
-// CHECK: load i32* [[I]]
+// CHECK: load i32, i32* @S
+// CHECK: load i32, i32* [[I]]
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i+=vS;
-// CHECK: load volatile i32* @vS
-// CHECK: load i32* [[I]]
+// CHECK: load volatile i32, i32* @vS
+// CHECK: load i32, i32* [[I]]
 // CHECK: store i32 {{.*}}, i32* [[I]]
   ++vtS;
-// CHECK: load volatile i32* @vtS
+// CHECK: load volatile i32, i32* @vtS
 // CHECK: store volatile i32 {{.*}}, i32* @vtS
   (void)vF2;
   // From vF2 to a temporary
diff --git a/test/CodeGen/x86-atomic-long_double.c b/test/CodeGen/x86-atomic-long_double.c
index 22c7bd4..9857c67 100644
--- a/test/CodeGen/x86-atomic-long_double.c
+++ b/test/CodeGen/x86-atomic-long_double.c
@@ -4,12 +4,12 @@
 long double testinc(_Atomic long double *addr) {
   // CHECK-LABEL: @testinc
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128* [[INT_ADDR]] seq_cst, align 16
+  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, i128* [[INT_ADDR]] seq_cst, align 16
   // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128*
   // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16
+  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
   // CHECK: br label %[[ATOMIC_OP:.+]]
   // CHECK: [[ATOMIC_OP]]
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -18,29 +18,29 @@
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
-  // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16
+  // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
-  // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16
+  // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
   // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
+  // CHECK: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
   // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK: [[ATOMIC_CONT]]
   // CHECK: ret x86_fp80 [[INC_VALUE]]
   // CHECK32-LABEL: @testinc
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4
+  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[TEMP_LD_ADDR]], align 4
   // CHECK32: br label %[[ATOMIC_OP:.+]]
   // CHECK32: [[ATOMIC_OP]]
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -54,8 +54,8 @@
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
   // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
+  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
+  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK32: [[ATOMIC_CONT]]
   // CHECK32: ret x86_fp80 [[INC_VALUE]]
@@ -66,12 +66,12 @@
 long double testdec(_Atomic long double *addr) {
   // CHECK-LABEL: @testdec
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128* [[INT_ADDR]] seq_cst, align 16
+  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, i128* [[INT_ADDR]] seq_cst, align 16
   // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128*
   // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16
-  // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16
+  // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
   // CHECK: br label %[[ATOMIC_OP:.+]]
   // CHECK: [[ATOMIC_OP]]
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -80,29 +80,29 @@
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
-  // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16
+  // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
-  // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16
+  // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
   // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
+  // CHECK: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
   // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK: [[ATOMIC_CONT]]
   // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]]
   // CHECK32-LABEL: @testdec
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5)
-  // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4
+  // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[TEMP_LD_ADDR]], align 4
   // CHECK32: br label %[[ATOMIC_OP:.+]]
   // CHECK32: [[ATOMIC_OP]]
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -116,8 +116,8 @@
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
   // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
+  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
+  // CHECK32: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK32: [[ATOMIC_CONT]]
   // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]]
@@ -129,12 +129,12 @@
   *addr -= 25;
   // CHECK-LABEL: @testcompassign
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128* [[INT_ADDR]] seq_cst, align 16
+  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, i128* [[INT_ADDR]] seq_cst, align 16
   // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128*
   // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16
+  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
   // CHECK: br label %[[ATOMIC_OP:.+]]
   // CHECK: [[ATOMIC_OP]]
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -143,35 +143,35 @@
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
-  // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16
+  // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
-  // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16
+  // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
   // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
+  // CHECK: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
   // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK: [[ATOMIC_CONT]]
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 8
   // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VAL:%.+]] = load atomic i128* [[ADDR_INT]] seq_cst, align 16
+  // CHECK: [[INT_VAL:%.+]] = load atomic i128, i128* [[ADDR_INT]] seq_cst, align 16
   // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128*
   // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16
+  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[LD_TEMP]], align 16
   // CHECK: ret x86_fp80 [[RET_VAL]]
   // CHECK32-LABEL: @testcompassign
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4
+  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[TEMP_LD_ADDR]], align 4
   // CHECK32: br label %[[ATOMIC_OP:.+]]
   // CHECK32: [[ATOMIC_OP]]
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -185,15 +185,15 @@
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
   // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
+  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
+  // CHECK32: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 4
   // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[GET_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_GET_ADDR]], i32 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[GET_ADDR]], align 4
+  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[GET_ADDR]], align 4
   // CHECK32: ret x86_fp80 [[RET_VAL]]
   return *addr;
 }
@@ -201,17 +201,17 @@
 long double testassign(_Atomic long double *addr) {
   // CHECK-LABEL: @testassign
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16
   // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128*
-  // CHECK: [[STORE_TEMP_INT:%.+]] = load i128* [[STORE_TEMP_INT_PTR]], align 16
+  // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, i128* [[STORE_TEMP_INT_PTR]], align 16
   // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: store atomic i128 [[STORE_TEMP_INT]], i128* [[ADDR_INT]] seq_cst, align 16
   // CHECK32-LABEL: @testassign
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
   // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false)
   // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4
@@ -219,18 +219,18 @@
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8*
   // CHECK32: call void @__atomic_store(i32 12, i8* [[ADDR_VOID]], i8* [[STORE_TEMP_VOID_PTR]], i32 5)
   *addr = 115;
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 8
   // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VAL:%.+]] = load atomic i128* [[ADDR_INT]] seq_cst, align 16
+  // CHECK: [[INT_VAL:%.+]] = load atomic i128, i128* [[ADDR_INT]] seq_cst, align 16
   // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128*
   // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16
+  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[LD_TEMP]], align 16
   // CHECK: ret x86_fp80 [[RET_VAL]]
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 4
   // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[VOID_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_LD_TEMP]], i32 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 4
+  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[LD_TEMP]], align 4
   // CHECK32: ret x86_fp80 [[RET_VAL]]
 
   return *addr;
@@ -239,12 +239,12 @@
 long double test_volatile_inc(volatile _Atomic long double *addr) {
   // CHECK-LABEL: @test_volatile_inc
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128* [[INT_ADDR]] seq_cst, align 16
+  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128, i128* [[INT_ADDR]] seq_cst, align 16
   // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128*
   // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16
+  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
   // CHECK: br label %[[ATOMIC_OP:.+]]
   // CHECK: [[ATOMIC_OP]]
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -253,29 +253,29 @@
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
-  // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16
+  // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
-  // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16
+  // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
   // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
+  // CHECK: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
   // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK: [[ATOMIC_CONT]]
   // CHECK: ret x86_fp80 [[INC_VALUE]]
   // CHECK32-LABEL: @test_volatile_inc
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4
+  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[TEMP_LD_ADDR]], align 4
   // CHECK32: br label %[[ATOMIC_OP:.+]]
   // CHECK32: [[ATOMIC_OP]]
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -289,8 +289,8 @@
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
   // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
+  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
+  // CHECK32: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK32: [[ATOMIC_CONT]]
   // CHECK32: ret x86_fp80 [[INC_VALUE]]
@@ -300,12 +300,12 @@
 long double test_volatile_dec(volatile _Atomic long double *addr) {
   // CHECK-LABEL: @test_volatile_dec
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128* [[INT_ADDR]] seq_cst, align 16
+  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128, i128* [[INT_ADDR]] seq_cst, align 16
   // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128*
   // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16
-  // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16
+  // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
   // CHECK: br label %[[ATOMIC_OP:.+]]
   // CHECK: [[ATOMIC_OP]]
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -314,29 +314,29 @@
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
-  // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16
+  // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
-  // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16
+  // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
   // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
+  // CHECK: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
   // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK: [[ATOMIC_CONT]]
   // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]]
   // CHECK32-LABEL: @test_volatile_dec
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5)
-  // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4
+  // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[TEMP_LD_ADDR]], align 4
   // CHECK32: br label %[[ATOMIC_OP:.+]]
   // CHECK32: [[ATOMIC_OP]]
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -350,8 +350,8 @@
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
   // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
+  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
+  // CHECK32: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK32: [[ATOMIC_CONT]]
   // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]]
@@ -362,12 +362,12 @@
   *addr -= 25;
   // CHECK-LABEL: @test_volatile_compassign
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128* [[INT_ADDR]] seq_cst, align 16
+  // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128, i128* [[INT_ADDR]] seq_cst, align 16
   // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128*
   // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16
+  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[LD_ADDR]], align 16
   // CHECK: br label %[[ATOMIC_OP:.+]]
   // CHECK: [[ATOMIC_OP]]
   // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -376,34 +376,34 @@
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16
   // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128*
-  // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16
+  // CHECK: [[OLD_INT:%.+]] = load i128, i128* [[OLD_INT_ADDR]], align 16
   // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16
   // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128*
-  // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16
+  // CHECK: [[NEW_INT:%.+]] = load i128, i128* [[NEW_INT_ADDR]], align 16
   // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst
   // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
   // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
   // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR:%.+]] to i128*
   // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
+  // CHECK: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_RES_PTR]], align 16
   // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK: [[ATOMIC_CONT]]
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 8
   // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128* [[ADDR_INT]] seq_cst, align 16
+  // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128, i128* [[ADDR_INT]] seq_cst, align 16
   // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128*
   // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16
+  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[LD_TEMP]], align 16
   // CHECK32-LABEL: @test_volatile_compassign
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4
+  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, x86_fp80* [[TEMP_LD_ADDR]], align 4
   // CHECK32: br label %[[ATOMIC_OP:.+]]
   // CHECK32: [[ATOMIC_OP]]
   // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
@@ -417,15 +417,15 @@
   // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8*
   // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8*
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80* [[OLD_VALUE_ADDR]], align 4
+  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 5, i32 5)
+  // CHECK32: [[LD_VALUE]] = load x86_fp80, x86_fp80* [[OLD_VALUE_ADDR]], align 4
   // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
   // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 4
   // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[GET_ADDR:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_GET_ADDR]], i32 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[GET_ADDR]], align 4
+  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[GET_ADDR]], align 4
   // CHECK32: ret x86_fp80 [[RET_VAL]]
   return *addr;
 }
@@ -433,17 +433,17 @@
 long double test_volatile_assign(volatile _Atomic long double *addr) {
   // CHECK-LABEL: @test_volatile_assign
   // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 8
   // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
   // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false)
   // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16
   // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128*
-  // CHECK: [[STORE_TEMP_INT:%.+]] = load i128* [[STORE_TEMP_INT_PTR]], align 16
+  // CHECK: [[STORE_TEMP_INT:%.+]] = load i128, i128* [[STORE_TEMP_INT_PTR]], align 16
   // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
   // CHECK: store atomic volatile i128 [[STORE_TEMP_INT]], i128* [[ADDR_INT]] seq_cst, align 16
   // CHECK32-LABEL: @test_volatile_assign
   // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** [[ADDR_ADDR]], align 4
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8*
   // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false)
   // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4
@@ -451,18 +451,18 @@
   // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8*
   // CHECK32: call void @__atomic_store(i32 12, i8* [[ADDR_VOID]], i8* [[STORE_TEMP_VOID_PTR]], i32 5)
   *addr = 115;
-  // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8
+  // CHECK: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 8
   // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128*
-  // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128* [[ADDR_INT]] seq_cst, align 16
+  // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128, i128* [[ADDR_INT]] seq_cst, align 16
   // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128*
   // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16
-  // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16
+  // CHECK: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[LD_TEMP]], align 16
   // CHECK: ret x86_fp80 [[RET_VAL]]
-  // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4
+  // CHECK32: [[ADDR:%.+]] = load x86_fp80*, x86_fp80** %{{.+}}, align 4
   // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8*
   // CHECK32: [[VOID_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i8*
   // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_LD_TEMP]], i32 5)
-  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 4
+  // CHECK32: [[RET_VAL:%.+]] = load x86_fp80, x86_fp80* [[LD_TEMP]], align 4
   // CHECK32: ret x86_fp80 [[RET_VAL]]
 
   return *addr;
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index e82e7b0..a0e30c6 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -402,8 +402,8 @@
   test49_helper(d, e);
 }
 // CHECK-LABEL:    define void @test49(
-// CHECK:      [[T0:%.*]] = load double*
-// CHECK-NEXT: [[T1:%.*]] = load double*
+// CHECK:      [[T0:%.*]] = load double, double*
+// CHECK-NEXT: [[T1:%.*]] = load double, double*
 // CHECK-NEXT: call void (double, ...)* @test49_helper(double [[T0]], double [[T1]])
 
 void test50_helper();
@@ -411,8 +411,8 @@
   test50_helper(d, e);
 }
 // CHECK-LABEL:    define void @test50(
-// CHECK:      [[T0:%.*]] = load double*
-// CHECK-NEXT: [[T1:%.*]] = load double*
+// CHECK:      [[T0:%.*]] = load double, double*
+// CHECK-NEXT: [[T1:%.*]] = load double, double*
 // CHECK-NEXT: call void (double, double, ...)* bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]])
 
 struct test51_s { __uint128_t intval; };
@@ -424,8 +424,8 @@
 // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16
 // CHECK: br i1
 // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3
-// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8** [[REG_SAVE_AREA_PTR]]
-// CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8* [[REG_SAVE_AREA]], i32 {{.*}}
+// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]]
+// CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}}
 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
diff --git a/test/CodeGen/xcore-abi.c b/test/CodeGen/xcore-abi.c
index 6dbc44b..23fb441 100644
--- a/test/CodeGen/xcore-abi.c
+++ b/test/CodeGen/xcore-abi.c
@@ -31,52 +31,52 @@
 
   char* v1 = va_arg (ap, char*);
   f(v1);
-  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i8**
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
-  // CHECK: [[V1:%[a-z0-9]+]] = load i8** [[P]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i8*, i8** [[P]]
   // CHECK: store i8* [[V1]], i8** [[V:%[a-z0-9]+]], align 4
-  // CHECK: [[V2:%[a-z0-9]+]] = load i8** [[V]], align 4
+  // CHECK: [[V2:%[a-z0-9]+]] = load i8*, i8** [[V]], align 4
   // CHECK: call void @f(i8* [[V2]])
 
   char v2 = va_arg (ap, char); // expected-warning{{second argument to 'va_arg' is of promotable type 'char'}}
   f(&v2);
-  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
-  // CHECK: [[V1:%[a-z0-9]+]] = load i8* [[I]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i8, i8* [[I]]
   // CHECK: store i8 [[V1]], i8* [[V:%[a-z0-9]+]], align 1
   // CHECK: call void @f(i8* [[V]])
 
   int v3 = va_arg (ap, int);
   f(&v3);
-  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i32*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
-  // CHECK: [[V1:%[a-z0-9]+]] = load i32* [[P]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i32, i32* [[P]]
   // CHECK: store i32 [[V1]], i32* [[V:%[a-z0-9]+]], align 4
   // CHECK: [[V2:%[a-z0-9]+]] = bitcast i32* [[V]] to i8*
   // CHECK: call void @f(i8* [[V2]])
 
   long long int v4 = va_arg (ap, long long int);
   f(&v4);
-  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i64*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 8
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 8
   // CHECK: store i8* [[IN]], i8** [[AP]]
-  // CHECK: [[V1:%[a-z0-9]+]] = load i64* [[P]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load i64, i64* [[P]]
   // CHECK: store i64 [[V1]], i64* [[V:%[a-z0-9]+]], align 4
   // CHECK:[[V2:%[a-z0-9]+]] = bitcast i64* [[V]] to i8*
   // CHECK: call void @f(i8* [[V2]])
 
   struct x v5 = va_arg (ap, struct x);  // typical aggregate type
   f(&v5);
-  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to %struct.x**
-  // CHECK: [[P:%[a-z0-9]+]] = load %struct.x** [[I2]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: [[P:%[a-z0-9]+]] = load %struct.x*, %struct.x** [[I2]]
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast %struct.x* [[V:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast %struct.x* [[P]] to i8*
@@ -86,27 +86,27 @@
 
   int* v6 = va_arg (ap, int[4]);  // an unusual aggregate type
   f(v6);
-  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to [4 x i32]**
-  // CHECK: [[P:%[a-z0-9]+]] = load [4 x i32]** [[I2]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 4
+  // CHECK: [[P:%[a-z0-9]+]] = load [4 x i32]*, [4 x i32]** [[I2]]
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast [4 x i32]* [[V0:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast [4 x i32]* [[P]] to i8*
   // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[V1]], i8* [[P1]], i32 16, i32 4, i1 false)
-  // CHECK: [[V2:%[a-z0-9]+]] = getelementptr inbounds [4 x i32]* [[V0]], i32 0, i32 0
+  // CHECK: [[V2:%[a-z0-9]+]] = getelementptr inbounds [4 x i32], [4 x i32]* [[V0]], i32 0, i32 0
   // CHECK: store i32* [[V2]], i32** [[V:%[a-z0-9]+]], align 4
-  // CHECK: [[V3:%[a-z0-9]+]] = load i32** [[V]], align 4
+  // CHECK: [[V3:%[a-z0-9]+]] = load i32*, i32** [[V]], align 4
   // CHECK: [[V4:%[a-z0-9]+]] = bitcast i32* [[V3]] to i8*
   // CHECK: call void @f(i8* [[V4]])
 
   double v7 = va_arg (ap, double);
   f(&v7);
-  // CHECK: [[I:%[a-z0-9]+]] = load i8** [[AP]]
+  // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to double*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8* [[I]], i32 8
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 8
   // CHECK: store i8* [[IN]], i8** [[AP]]
-  // CHECK: [[V1:%[a-z0-9]+]] = load double* [[P]]
+  // CHECK: [[V1:%[a-z0-9]+]] = load double, double* [[P]]
   // CHECK: store double [[V1]], double* [[V:%[a-z0-9]+]], align 4
   // CHECK: [[V2:%[a-z0-9]+]] = bitcast double* [[V]] to i8*
   // CHECK: call void @f(i8* [[V2]])