Revert "Change memcpy/memset/memmove to have dest and source alignments."

This reverts commit r253511.

This likely broke the bots in
http://lab.llvm.org:8011/builders/clang-ppc64-elf-linux2/builds/20202
http://bb.pgr.jp/builders/clang-3stage-i686-linux/builds/3787

llvm-svn: 253543
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
index b3884ff..851e6dc 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -180,11 +180,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %0 = bitcast i32* %a to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo
-; CHECK: @llvm.memset.p0i8.i64(i8* align 32 %0, i8 0, i64 64, i1 false)
+; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
 ; CHECK: ret i32 undef
 }
 
@@ -200,16 +200,16 @@
   tail call void @llvm.assume(i1 %maskcond4)
   %0 = bitcast i32* %a to i8*
   %1 = bitcast i32* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo2
-; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 32 %1, i64 64, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
 ; CHECK: ret i32 undef
 }
 
 declare void @llvm.assume(i1) nounwind
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
index 01e71e5..2edc2e9 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -180,11 +180,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %0 = bitcast i32* %a to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo
-; CHECK: @llvm.memset.p0i8.i64(i8* align 32 %0, i8 0, i64 64, i1 false)
+; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
 ; CHECK: ret i32 undef
 }
 
@@ -200,16 +200,16 @@
   tail call void @llvm.assume(i1 %maskcond4)
   %0 = bitcast i32* %a to i8*
   %1 = bitcast i32* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo2
-; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 32 %1, i64 64, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
 ; CHECK: ret i32 undef
 }
 
 declare void @llvm.assume(i1) nounwind
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
diff --git a/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll b/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
index cace3da..a6ea27f 100644
--- a/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -5,7 +5,7 @@
 %class.QBezier.15 = type { double, double, double, double, double, double, double, double }
 
 ; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
 
 ; Function Attrs: uwtable
 declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
@@ -55,7 +55,7 @@
   %v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
   call void @llvm.lifetime.start(i64 64, i8* %v2)
   %v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
   call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
   %x2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
   %v4 = load double, double* %x2.i, align 16
@@ -130,9 +130,9 @@
   %y454.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
   store double %mul52.i, double* %y454.i, align 8
   %v22 = bitcast %class.QBezier.15* %add.ptr to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
   call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
   call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
   call void @llvm.lifetime.end(i64 64, i8* %v0)
   call void @llvm.lifetime.end(i64 64, i8* %v1)
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
index f4c1af5..1e12c01 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
@@ -9,7 +9,7 @@
 ; - TLI::has (always returns false thanks to -disable-simplify-libcalls)
 
 ; CHECK-NOT: _chk
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %len, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %len, i32 1, i1 false)
 define void @test_nobuiltin(i8* %dst, i64 %len) {
   call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin
   ret void
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll b/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
index e5882f12..6fb4cb6 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -30,10 +30,10 @@
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 define void @test4(i8* %dest, i8* %src) {
 ; CHECK: test4
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
   br label %bb
 bb:
   icmp ne i8* %dest, null
@@ -42,10 +42,10 @@
   ret void
 }
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 define void @test5(i8* %dest, i8* %src) {
 ; CHECK: test5
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
   br label %bb
 bb:
   icmp ne i8* %dest, null
@@ -54,10 +54,10 @@
   ret void
 }
 
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
 define void @test6(i8* %dest) {
 ; CHECK: test6
-  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i32 1, i1 false)
   br label %bb
 bb:
   icmp ne i8* %dest, null
@@ -67,7 +67,7 @@
 
 define void @test7(i8* %dest, i8* %src, i32 %len) {
 ; CHECK: test7
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
   br label %bb
 bb:
   %KEEP1 = icmp ne i8* %dest, null
@@ -77,10 +77,10 @@
   ret void
 }
 
-declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) *, i8 addrspace(1) *, i32, i1)
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) *, i8 addrspace(1) *, i32, i32, i1)
 define void @test8(i8 addrspace(1) * %dest, i8 addrspace(1) * %src) {
 ; CHECK: test8
-  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) * %dest, i8 addrspace(1) * %src, i32 1, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) * %dest, i8 addrspace(1) * %src, i32 1, i32 1, i1 false)
   br label %bb
 bb:
   %KEEP1 = icmp ne i8 addrspace(1) * %dest, null
@@ -92,7 +92,7 @@
 
 define void @test9(i8* %dest, i8* %src) {
 ; CHECK: test9
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 true)
   br label %bb
 bb:
   %KEEP1 = icmp ne i8* %dest, null
diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
index 665d772..d30e9a2 100644
--- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
@@ -60,7 +60,7 @@
 %struct.AttrListPtr = type { %struct.AttributeListImpl* }
 %struct.AttributeListImpl = type opaque
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 ; CHECK: _ZSt9iter_swapIPSt4pairIPN4llvm10BasicBlockEjES5_EvT_T0_
 ; CHECK: store
@@ -78,8 +78,8 @@
   store i32 %5, i32* %3, align 8
   %6 = bitcast %struct.pair.162* %__a to i8*
   %7 = bitcast %struct.pair.162* %__b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 12, i32 1, i1 false)
   %8 = bitcast %struct.pair.162* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 12, i32 1, i1 false)
   ret void
 }
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
index 863fc9e..de7a4cc 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -12,8 +12,8 @@
 entry:
   %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
   %p3 = bitcast i32* %arrayidx0 to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 24, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
@@ -23,8 +23,8 @@
 ; CHECK-LABEL: @write28to32(
 entry:
   %p3 = bitcast i32* %p to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
@@ -34,8 +34,8 @@
 ; CHECK-LABEL: @dontwrite28to32memset(
 entry:
   %p3 = bitcast i32* %p to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
@@ -45,8 +45,8 @@
 ; CHECK-LABEL: @write32to36(
 entry:
   %0 = bitcast %struct.vec2plusi* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
   %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2
   store i32 1, i32* %c, align 4
   ret void
@@ -56,8 +56,8 @@
 ; CHECK-LABEL: @write16to32(
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
   %c = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 1
   store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
   ret void
@@ -67,15 +67,15 @@
 ; CHECK-LABEL: @dontwrite28to32memcpy(
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
   %arrayidx1 = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 0, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 %struct.trapframe = type { i64, i64, i64 }
 
@@ -87,8 +87,8 @@
   %add.ptr = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1
   %1 = bitcast %struct.trapframe* %add.ptr to i8*
   %2 = bitcast %struct.trapframe* %md_regs to i8*
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
   %tf_trapno = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1, i32 1
   store i64 3, i64* %tf_trapno, align 8
   ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/crash.ll b/llvm/test/Transforms/DeadStoreElimination/crash.ll
index 9276569..78cb842 100644
--- a/llvm/test/Transforms/DeadStoreElimination/crash.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/crash.ll
@@ -36,11 +36,11 @@
   %6 = getelementptr inbounds i16, i16* %2, i64 undef  ; <i16*> [#uses=1]
   store i16 undef, i16* %6, align 2
   %7 = getelementptr inbounds i8, i8* %5, i64 undef   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 
 ; rdar://7635088
diff --git a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll b/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
index 2086bc9..30bb96f 100644
--- a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
@@ -9,7 +9,7 @@
 %union.anon = type { i64, [8 x i8] }
 
 ; Function Attrs: nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
 
 ; Function Attrs: noinline nounwind readonly uwtable
 declare zeroext i1 @callee_takes_string(%class.basic_string* nonnull) #1 align 2
@@ -61,10 +61,10 @@
 ; CHECK: store i8 0, i8* %tmp14, align 1
 
   %tmp17 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp1)
-  call void @llvm.memset.p0i8.i64(i8* %tmp11, i8 -51, i64 16, i1 false) #0
-  call void @llvm.memset.p0i8.i64(i8* %tmp15, i8 -51, i64 32, i1 false) #0
-  call void @llvm.memset.p0i8.i64(i8* %tmp4, i8 -51, i64 16, i1 false) #0
-  call void @llvm.memset.p0i8.i64(i8* %tmp8, i8 -51, i64 32, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp11, i8 -51, i64 16, i32 8, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp15, i8 -51, i64 32, i32 8, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp4, i8 -51, i64 16, i32 8, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp8, i8 -51, i64 32, i32 8, i1 false) #0
   ret i1 %tmp17
 }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/lifetime.ll
index 87c3104..305c916 100644
--- a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -4,7 +4,7 @@
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK-LABEL: @test1(
@@ -14,7 +14,7 @@
   call void @llvm.lifetime.end(i64 1, i8* %A)
 ; CHECK: lifetime.end
 
-  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 ; CHECK-NOT: memset
 
   ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
index aa9f1a1..5bbb8e0 100644
--- a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -S -dse < %s | FileCheck %s
 
-declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
-declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
-declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK-LABEL: @test1(
@@ -12,7 +12,7 @@
   store i8 0, i8* %A  ;; Written to by memcpy
 ; CHECK-NOT: store
 
-  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -26,7 +26,7 @@
   store i8 0, i8* %A  ;; Written to by memmove
 ; CHECK-NOT: store
 
-  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -40,7 +40,7 @@
   store i8 0, i8* %A  ;; Written to by memset
 ; CHECK-NOT: store
 
-  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll b/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll
index b66b75e..f9262ed 100644
--- a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define void @fn(i8* nocapture %buf) #0 {
 entry:
@@ -13,9 +13,9 @@
 ; CHECK: ret void
 
   %arrayidx = getelementptr i8, i8* %buf, i64 18
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i32 1, i1 false)
   store i8 1, i8* %arrayidx, align 1
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i32 1, i1 false)
   ret void
 }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll b/llvm/test/Transforms/DeadStoreElimination/pr11390.ll
index 6105a2e..faf3b8b 100644
--- a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -17,12 +17,12 @@
   br i1 %tobool, label %return, label %if.end
 
 if.end:                                           ; preds = %entry
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i8, i8* %call4, i64 %call
   store i8 46, i8* %arrayidx, align 1
 ; CHECK: store i8 46
   %add.ptr5 = getelementptr inbounds i8, i8* %call4, i64 %add
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i32 1, i1 false)
   %arrayidx8 = getelementptr inbounds i8, i8* %call4, i64 %add2
   store i8 0, i8* %arrayidx8, align 1
   br label %return
@@ -35,4 +35,4 @@
 
 declare noalias i8* @malloc(i64) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index 349713e..4f6221d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -basicaa -dse -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 declare void @llvm.init.trampoline(i8*, i8*, i8*)
 
 define void @test1(i32* %Q, i32* %P) {
@@ -63,7 +63,7 @@
 ; alias).
 define void @test6(i32 *%p, i8 *%q) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK-LABEL: @test6(
@@ -74,7 +74,7 @@
 ; alias).
 define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK-LABEL: @test7(
@@ -208,8 +208,8 @@
 
 ;; Fully dead overwrite of memcpy.
 define void @test15(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test15(
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -218,8 +218,8 @@
 
 ;; Full overwrite of smaller memcpy.
 define void @test16(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test16(
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -228,8 +228,8 @@
 
 ;; Overwrite of memset by memcpy.
 define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test17(
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -238,8 +238,8 @@
 
 ; Should not delete the volatile memset.
 define void @test17v(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 true)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test17v(
 ; CHECK-NEXT: call void @llvm.memset
@@ -252,8 +252,8 @@
 ; A = B
 ; A = A
 define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT: call void @llvm.memcpy
diff --git a/llvm/test/Transforms/GVN/nonescaping-malloc.ll b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
index ea85924..f83b317 100644
--- a/llvm/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
@@ -80,7 +80,7 @@
 _ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb.i.i, %bb4.i
   %tmp.i18.i.i = getelementptr inbounds i8, i8* %tmp.i20.i.i, i64 16
   %tmp15.i.i = zext i32 %tmp4.i.i to i64
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1, i1 false)
   %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16
   %tmp17.i.i = getelementptr inbounds i8, i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i
   store i8 0, i8* %tmp17.i.i, align 1
@@ -106,4 +106,4 @@
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/GVN/pr17732.ll b/llvm/test/Transforms/GVN/pr17732.ll
index 1410683..6c40ccf 100644
--- a/llvm/test/Transforms/GVN/pr17732.ll
+++ b/llvm/test/Transforms/GVN/pr17732.ll
@@ -14,10 +14,10 @@
 
 define i32 @main() {
 entry:
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array, %struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }, { [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array, %struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }, { [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
   %0 = load i8, i8* getelementptr inbounds (%struct.with_array, %struct.with_array* @array_with_zeroinit, i64 0, i32 2), align 4
 
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector, %struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }, { <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector, %struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }, { <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
   %1 = load i8, i8* getelementptr inbounds (%struct.with_vector, %struct.with_vector* @vector_with_zeroinit, i64 0, i32 2), align 4
   %conv0 = sext i8 %0 to i32
   %conv1 = sext i8 %1 to i32
@@ -27,4 +27,4 @@
 ; CHECK: ret i32 1
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/llvm/test/Transforms/GVN/rle.ll b/llvm/test/Transforms/GVN/rle.ll
index bc514d6..3f42135 100644
--- a/llvm/test/Transforms/GVN/rle.ll
+++ b/llvm/test/Transforms/GVN/rle.ll
@@ -27,7 +27,7 @@
 ;; No PR filed, crashed in CaptureTracker.
 declare void @helper()
 define void @crash1() {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) nounwind
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i32 1, i1 false) nounwind
   %tmp = load i8, i8* bitcast (void ()* @helper to i8*)
   %x = icmp eq i8 %tmp, 15
   ret void
@@ -142,7 +142,7 @@
 define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
 entry:
   %conv = bitcast i16* %A to i8* 
-  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i16, i16* %A, i64 42
   %tmp2 = load i16, i16* %arrayidx
   ret i16 %tmp2
@@ -155,7 +155,7 @@
 define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 42 ; <float*> [#uses=1]
   %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -175,11 +175,11 @@
   %P3 = bitcast i16* %P to i8*
   br i1 %cond, label %T, label %F
 T:
-  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
   br label %Cont
   
 F:
-  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
   br label %Cont
 
 Cont:
@@ -201,7 +201,7 @@
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -214,7 +214,7 @@
 define float @memcpy_to_float_local_as1(float* %A) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -565,7 +565,7 @@
 entry:
   %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
   %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %x, i32 0, i32 0 ; <i32*>
   %tmp1 = load i32, i32* %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
@@ -669,10 +669,10 @@
 ; CHECK: ret i32
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
 
 
 ;;===----------------------------------------------------------------------===;;
diff --git a/llvm/test/Transforms/GlobalOpt/crash.ll b/llvm/test/Transforms/GlobalOpt/crash.ll
index 883b72e..8e39931 100644
--- a/llvm/test/Transforms/GlobalOpt/crash.ll
+++ b/llvm/test/Transforms/GlobalOpt/crash.ll
@@ -60,10 +60,10 @@
 
 @data8 = internal global [8000 x i8] zeroinitializer, align 16
 define void @memset_with_strange_user() ssp {
-  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([8000 x i8], [8000 x i8]* @data8, i64 0, i64 0), i8 undef, i64 ptrtoint (i8* getelementptr ([8000 x i8], [8000 x i8]* @data8, i64 1, i64 sub (i64 0, i64 ptrtoint ([8000 x i8]* @data8 to i64))) to i64), i1 false)
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([8000 x i8], [8000 x i8]* @data8, i64 0, i64 0), i8 undef, i64 ptrtoint (i8* getelementptr ([8000 x i8], [8000 x i8]* @data8, i64 1, i64 sub (i64 0, i64 ptrtoint ([8000 x i8]* @data8 to i64))) to i64), i32 16, i1 false)
   ret void
 }
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 
 ; PR9856
diff --git a/llvm/test/Transforms/GlobalOpt/memcpy.ll b/llvm/test/Transforms/GlobalOpt/memcpy.ll
index b2b192b..437142e 100644
--- a/llvm/test/Transforms/GlobalOpt/memcpy.ll
+++ b/llvm/test/Transforms/GlobalOpt/memcpy.ll
@@ -6,8 +6,8 @@
 define void @foo() {
   %Blah = alloca [58 x i8]
   %tmp.0 = getelementptr [58 x i8], [58 x i8]* %Blah, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp.0, i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G1, i32 0, i32 0), i32 58, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp.0, i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/GlobalOpt/memset-null.ll b/llvm/test/Transforms/GlobalOpt/memset-null.ll
index 1a1f550..838ac09 100644
--- a/llvm/test/Transforms/GlobalOpt/memset-null.ll
+++ b/llvm/test/Transforms/GlobalOpt/memset-null.ll
@@ -8,12 +8,12 @@
 @a = global %struct.A zeroinitializer, align 4
 @llvm.global_ctors = appending global [2 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I_a }, %0 { i32 65535, void ()* @_GLOBAL__I_b }]
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 ; CHECK-NOT: GLOBAL__I_a
 define internal void @_GLOBAL__I_a() nounwind {
 entry:
-  tail call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.A* @a to i8*), i8 0, i64 400, i1 false) nounwind
+  tail call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.A* @a to i8*), i8 0, i64 400, i32 4, i1 false) nounwind
   ret void
 }
 
@@ -24,6 +24,6 @@
 define internal void @_GLOBAL__I_b() nounwind {
 entry:
   %tmp.i.i.i = load i8*, i8** @y, align 8
-  tail call void @llvm.memset.p0i8.i64(i8* %tmp.i.i.i, i8 0, i64 10, i1 false) nounwind
+  tail call void @llvm.memset.p0i8.i64(i8* %tmp.i.i.i, i8 0, i64 10, i32 1, i1 false) nounwind
   ret void
 }
diff --git a/llvm/test/Transforms/GlobalOpt/memset.ll b/llvm/test/Transforms/GlobalOpt/memset.ll
index 02bef2c..1dfdd64 100644
--- a/llvm/test/Transforms/GlobalOpt/memset.ll
+++ b/llvm/test/Transforms/GlobalOpt/memset.ll
@@ -10,8 +10,8 @@
 define void @foo() {
   %Blah = alloca [58 x i8]
   %tmp3 = bitcast [58 x i8]* %Blah to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
   ret void
 }
 
@@ -21,11 +21,11 @@
 define void @foo_as1() {
   %Blah = alloca [58 x i8]
   %tmp3 = bitcast [58 x i8]* %Blah to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* addrspacecast ([4 x i32] addrspace(1)* @G1_as1 to i8*), i8* %tmp3, i32 16, i1 false)
-  call void @llvm.memset.p1i8.i32(i8 addrspace(1)* getelementptr inbounds ([58 x i8], [58 x i8] addrspace(1)* @G0_as1, i32 0, i32 0), i8 17, i32 58, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* addrspacecast ([4 x i32] addrspace(1)* @G1_as1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
+  call void @llvm.memset.p1i8.i32(i8 addrspace(1)* getelementptr inbounds ([58 x i8], [58 x i8] addrspace(1)* @G0_as1, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
-declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture, i8, i32, i1) nounwind
\ No newline at end of file
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture, i8, i32, i32, i1) nounwind
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
index 67b5789..3957531 100644
--- a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -31,7 +31,7 @@
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #2
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
 
 ; Function Attrs: nounwind
 define void @_Z3fn4v() #0 !dbg !21 {
@@ -46,14 +46,14 @@
   %agg.tmp.sroa.0.0.copyload = load i32, i32* getelementptr inbounds (%struct.A, %struct.A* @b, i64 0, i32 0), align 8, !dbg !50
   tail call void @llvm.dbg.value(metadata i32 %agg.tmp.sroa.0.0.copyload, i64 0, metadata !46, metadata !51), !dbg !49
   %agg.tmp.sroa.3.0..sroa_idx = getelementptr inbounds [20 x i8], [20 x i8]* %agg.tmp.sroa.3, i64 0, i64 0, !dbg !50
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.tmp.sroa.3.0..sroa_idx, i8* getelementptr (i8, i8* bitcast (%struct.A* @b to i8*), i64 4), i64 20, i1 false), !dbg !50
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.tmp.sroa.3.0..sroa_idx, i8* getelementptr (i8, i8* bitcast (%struct.A* @b to i8*), i64 4), i64 20, i32 4, i1 false), !dbg !50
   tail call void @llvm.dbg.declare(metadata %struct.A* undef, metadata !46, metadata !31) #2, !dbg !49
   %tobool.i = icmp eq i32 %agg.tmp.sroa.0.0.copyload, 0, !dbg !52
   br i1 %tobool.i, label %_Z3fn31A.exit, label %if.then.i, !dbg !53
 
 if.then.i:                                        ; preds = %entry
   store i32 %agg.tmp.sroa.0.0.copyload, i32* getelementptr inbounds (%struct.A, %struct.A* @a, i64 0, i32 0), align 8, !dbg !54
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 4), i8* %agg.tmp.sroa.3.0..sroa_idx, i64 20, i1 false), !dbg !54
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 4), i8* %agg.tmp.sroa.3.0..sroa_idx, i64 20, i32 4, i1 false), !dbg !54
   br label %_Z3fn31A.exit, !dbg !54
 
 _Z3fn31A.exit:                                    ; preds = %entry, %if.then.i
diff --git a/llvm/test/Transforms/Inline/inline-invoke-tail.ll b/llvm/test/Transforms/Inline/inline-invoke-tail.ll
index 2fa1fe1..f4b8065 100644
--- a/llvm/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/llvm/test/Transforms/Inline/inline-invoke-tail.ll
@@ -4,7 +4,7 @@
 define internal void @foo(i32* %p, i32* %q) {
 	%pp = bitcast i32* %p to i8*
 	%qq = bitcast i32* %q to i8*
-	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i1 false)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i32 1, i1 false)
 	ret void
 }
 
@@ -30,4 +30,4 @@
 
 declare i32 @__gxx_personality_v0(...)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/Inline/inline-vla.ll b/llvm/test/Transforms/Inline/inline-vla.ll
index 5d82067..df21b3f 100644
--- a/llvm/test/Transforms/Inline/inline-vla.ll
+++ b/llvm/test/Transforms/Inline/inline-vla.ll
@@ -21,13 +21,13 @@
 entry:
   %vla = alloca i64, i64 %size, align 16
   %0 = bitcast i64* %vla to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %src, i64 %size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %0, i64 %size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %src, i64 %size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %0, i64 %size, i32 1, i1 false)
   ret void
 }
 
 ; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #2
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { inlinehint nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/Inline/noalias-calls.ll b/llvm/test/Transforms/Inline/noalias-calls.ll
index 23d545d..56d5c6d 100644
--- a/llvm/test/Transforms/Inline/noalias-calls.ll
+++ b/llvm/test/Transforms/Inline/noalias-calls.ll
@@ -2,17 +2,17 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
 declare void @hey() #0
 
 define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 {
 entry:
   %l = alloca i8, i32 512, align 1
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 0)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i1 0)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 0)
   call void @hey()
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l, i8* %c, i64 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l, i8* %c, i64 16, i32 16, i1 0)
   ret void
 }
 
@@ -24,11 +24,11 @@
 
 ; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
 ; CHECK: entry:
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 false) #1, !noalias !0
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i1 false) #1, !noalias !3
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i1 false) #1, !alias.scope !5
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #1, !noalias !0
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #1, !alias.scope !5
 ; CHECK:   call void @hey() #1, !noalias !5
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i1 false) #1, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
 ; CHECK:   ret void
 ; CHECK: }
 
diff --git a/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index 427d0e3..67a94e5 100644
--- a/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -10,11 +10,11 @@
   store i8* %P, i8** %P_addr
   %tmp = load i8*, i8** %P_addr, align 4
   %tmp1 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i32 1, i1 false)
   br label %return
 
 return:                                           ; preds = %entry
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index c976ce0..0f8b38c 100644
--- a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -64,7 +64,7 @@
   %val.i = load i32*, i32** %elt.i
   %tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*
   %tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1, i1 false)
   %26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %val.i, i32** %26
   %27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
@@ -276,4 +276,4 @@
   ret i32* %tmp14
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/addrspacecast.ll b/llvm/test/Transforms/InstCombine/addrspacecast.ll
index e3423fd..27f6b72 100644
--- a/llvm/test/Transforms/InstCombine/addrspacecast.ll
+++ b/llvm/test/Transforms/InstCombine/addrspacecast.ll
@@ -3,9 +3,9 @@
 target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
 
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i32, i1) nounwind
 
 
 define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
@@ -127,7 +127,7 @@
 define i32 @memcpy_addrspacecast() nounwind {
 entry:
   %alloca = alloca i8, i32 48
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i1 false) nounwind
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i32 4, i1 false) nounwind
   br label %loop.body
 
 loop.body:
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index d92dadd..ab0ae39 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -80,7 +80,7 @@
   ret double %t
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 declare void @use(i8*)
 
@@ -90,8 +90,8 @@
 ; Check that the alignment is bumped up the alignment of the sret type.
 ; CHECK-LABEL: @test3(
   %a4.cast = bitcast %struct.s* %a4 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i1 false)
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %a4.cast, i8 0, i64 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 4, i1 false)
   call void @use(i8* %a4.cast)
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/alloca.ll b/llvm/test/Transforms/InstCombine/alloca.ll
index 9975d1d..b61b75e 100644
--- a/llvm/test/Transforms/InstCombine/alloca.ll
+++ b/llvm/test/Transforms/InstCombine/alloca.ll
@@ -105,11 +105,11 @@
 entry:
   %0 = alloca %real_type, align 4
   %1 = bitcast %real_type* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 
 ; Check that the GEP indices use the pointer size, or 64 if unknown
@@ -145,7 +145,7 @@
   %0 = getelementptr inbounds <{ %struct_type }>, <{ %struct_type }>* %argmem, i32 0, i32 0
   %1 = bitcast %struct_type* %0 to i8*
   %2 = bitcast %struct_type* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 8, i32 4, i1 false)
   call void @test9_aux(<{ %struct_type }>* inalloca %argmem)
   call void @llvm.stackrestore(i8* %inalloca.save)
   ret void
diff --git a/llvm/test/Transforms/InstCombine/call-intrinsics.ll b/llvm/test/Transforms/InstCombine/call-intrinsics.ll
index b9bbcb7..3e37a71 100644
--- a/llvm/test/Transforms/InstCombine/call-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/call-intrinsics.ll
@@ -3,17 +3,17 @@
 @X = global i8 0                ; <i8*> [#uses=3]
 @Y = global i8 12               ; <i8*> [#uses=2]
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
 
 define void @zero_byte_test() {
         ; These process zero bytes, so they are a noop.
-        call void @llvm.memmove.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i1 false )
-        call void @llvm.memcpy.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i1 false )
-        call void @llvm.memset.p0i8.i32( i8* @X, i8 123, i32 0, i1 false )
+        call void @llvm.memmove.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i32 128, i1 false )
+        call void @llvm.memcpy.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i32 128, i1 false )
+        call void @llvm.memset.p0i8.i32( i8* @X, i8 123, i32 0, i32 128, i1 false )
         ret void
 }
 
diff --git a/llvm/test/Transforms/InstCombine/malloc-free-delete.ll b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
index 4720fe1..138001a 100644
--- a/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -27,9 +27,9 @@
 declare void @llvm.lifetime.start(i64, i8*)
 declare void @llvm.lifetime.end(i64, i8*)
 declare i64 @llvm.objectsize.i64(i8*, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
 
 define void @test3(i8* %src) {
 ; CHECK-LABEL: @test3(
@@ -39,9 +39,9 @@
   call void @llvm.lifetime.end(i64 10, i8* %a)
   %size = call i64 @llvm.objectsize.i64(i8* %a, i1 true)
   store i8 42, i8* %a
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i1 false)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %a, i8 5, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 5, i32 32, i32 1, i1 false)
   %alloc2 = call noalias i8* @calloc(i32 5, i32 7) nounwind
   %z = icmp ne i8* %alloc2, null
   ret void
@@ -82,12 +82,12 @@
   %e = call i8* @malloc(i32 700)
   %f = call i8* @malloc(i32 700)
   %g = call i8* @malloc(i32 700)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %a, i32 32, i1 false)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %b, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %a, i32 32, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %b, i32 32, i32 1, i1 false)
   store i8* %c, i8** %esc
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %ptr, i32 32, i1 true)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %e, i8* %ptr, i32 32, i1 true)
-  call void @llvm.memset.p0i8.i32(i8* %f, i8 5, i32 32, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %ptr, i32 32, i32 1, i1 true)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %e, i8* %ptr, i32 32, i32 1, i1 true)
+  call void @llvm.memset.p0i8.i32(i8* %f, i8 5, i32 32, i32 1, i1 true)
   store volatile i8 4, i8* %g
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
index edf95b7..da38087 100644
--- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -6,7 +6,7 @@
 entry:
 	%lookupTable = alloca [128 x float], align 16		; <[128 x float]*> [#uses=5]
 	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i1 false)
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i32 16, i1 false)
 
 ; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
@@ -36,10 +36,10 @@
 	ret float %tmp43
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
 
 %T = type { i8, [123 x i8] }
 %U = type { i32, i32, i32, i32, i32 }
@@ -60,9 +60,9 @@
 ; CHECK-NEXT: getelementptr inbounds [124 x i8], [124 x i8]*
 
 ; use @G instead of %A
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 16 getelementptr inbounds (%T, %T* @G, i64 0, i32 0)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
   call void @bar(i8* %b)
   ret void
 }
@@ -83,9 +83,9 @@
 ; CHECK-NEXT: addrspacecast
 
 ; use @G instead of %A
-; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %{{.*}},
-  call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %b, i8 addrspace(1)* %a, i64 124, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %{{.*}},
+  call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %b, i8 addrspace(1)* %a, i64 124, i32 4, i1 false)
   call void @bar_as1(i8 addrspace(1)* %b)
   ret void
 }
@@ -98,7 +98,7 @@
 define void @test3() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -108,7 +108,7 @@
 define void @test3_addrspacecast() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%T* @G to i8 addrspace(1)*), i64 124, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%T* @G to i8 addrspace(1)*), i64 124, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test3_addrspacecast(
 ; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -119,7 +119,7 @@
 define void @test4() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @baz(i8* byval %a)
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -131,7 +131,7 @@
   %A = alloca %T
   %a = bitcast %T* %A to i8*
   call void @llvm.lifetime.start(i64 -1, i8* %a)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @baz(i8* byval %a)
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -145,7 +145,7 @@
 define void @test6() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
@@ -155,7 +155,7 @@
 define void @test7() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
@@ -165,7 +165,7 @@
 define void @test8() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test8(
 ; CHECK: llvm.memcpy
@@ -177,7 +177,7 @@
 define void @test8_addrspacecast() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test8_addrspacecast(
 ; CHECK: llvm.memcpy
@@ -188,7 +188,7 @@
 define void @test9() {
   %A = alloca %U, align 4
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*))
@@ -198,7 +198,7 @@
 define void @test9_addrspacecast() {
   %A = alloca %U, align 4
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test9_addrspacecast(
 ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*))
diff --git a/llvm/test/Transforms/InstCombine/memcpy-to-load.ll b/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
index 353f362..bcc9e18 100644
--- a/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -6,8 +6,8 @@
 entry:
   %tmp2 = bitcast double* %X to i8*
   %tmp13 = bitcast double* %Y to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memcpy.ll b/llvm/test/Transforms/InstCombine/memcpy.ll
index 9d502e2..f66e14c 100644
--- a/llvm/test/Transforms/InstCombine/memcpy.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define void @test1(i8* %a) {
-        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 false)
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
         ret void
 ; CHECK-LABEL: define void @test1(
 ; CHECK-NEXT: ret void
@@ -13,14 +13,14 @@
 
 ; PR8267
 define void @test2(i8* %a) {
-        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 true)
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 true)
         ret void
 ; CHECK-LABEL: define void @test2(
 ; CHECK-NEXT: call void @llvm.memcpy
 }
 
 define void @test3(i8* %d, i8* %s) {
-        tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i1 false)
+        tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i32 4, i1 false)
         ret void
 ; CHECK-LABEL: define void @test3(
 ; CHECK-NEXT: call void @llvm.memcpy
diff --git a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
index a372ef2..ddaaf82 100644
--- a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -20,7 +20,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
   ret i8* %ret
@@ -31,7 +31,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T3* @t3 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/memmove.ll b/llvm/test/Transforms/InstCombine/memmove.ll
index 7f43405..96f230e 100644
--- a/llvm/test/Transforms/InstCombine/memmove.ll
+++ b/llvm/test/Transforms/InstCombine/memmove.ll
@@ -8,13 +8,13 @@
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
 define void @test1(i8* %A, i8* %B, i32 %N) {
-	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i1 false)
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i32 1, i1 false)
 	ret void
 }
 
 define void @test2(i8* %A, i32 %N) {
         ;; dest can't alias source since we can't write to source!
-	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i1 false)
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1, i1 false)
 	ret void
 }
 
@@ -24,16 +24,16 @@
 	%hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
 	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
 	%target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i1 false)
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i1 false)
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
 	ret i32 0
 }
 
 ; PR2370
 define void @test4(i8* %a) {
-  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
index f006985..e4e1f6e 100644
--- a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -20,7 +20,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
   ret i8* %ret
@@ -31,7 +31,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
-; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T3* @t3 to i8*), i64 1824, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/memset.ll b/llvm/test/Transforms/InstCombine/memset.ll
index 3f34465..dfafcf9 100644
--- a/llvm/test/Transforms/InstCombine/memset.ll
+++ b/llvm/test/Transforms/InstCombine/memset.ll
@@ -3,12 +3,12 @@
 define i32 @main() {
   %target = alloca [1024 x i8]
   %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 2, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 4, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 2, i32 2, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 4, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 8, i32 8, i1 false)
   ret i32 0
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memset2.ll b/llvm/test/Transforms/InstCombine/memset2.ll
index 5999c5a5..ad4c225 100644
--- a/llvm/test/Transforms/InstCombine/memset2.ll
+++ b/llvm/test/Transforms/InstCombine/memset2.ll
@@ -8,8 +8,8 @@
 entry:
 ; CHECK: bitcast i8 addrspace(1)* %gep to i64 addrspace(1)*
 	%gep = getelementptr inbounds %struct.Moves, %struct.Moves addrspace(1)* %moves, i32 1, i32 0, i32 9
-	 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %gep, i8 0, i64 8, i1 false)                                                                     
+	 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %gep, i8 0, i64 8, i32 1, i1 false)                                                                     
 	ret i32 0
 }
 
-declare void @llvm.memset.p1i8.i64(i8addrspace(1)* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p1i8.i64(i8addrspace(1)* nocapture, i8, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
index e0d7376..56ea14c 100644
--- a/llvm/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
@@ -15,7 +15,7 @@
 ; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
   %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
   ret i8* %ret
@@ -25,7 +25,7 @@
 ; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
   %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
   ret i8* %ret
@@ -35,7 +35,7 @@
 ; CHECK-LABEL: @test_simplify3(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
   %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
   ret i8* %ret
@@ -79,7 +79,7 @@
   %sub183 = ptrtoint i8* %b to i64
   %sub184 = sub i64 %sub182, %sub183
   %add52.i.i = add nsw i64 %sub184, 1
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %strchr2
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %strchr2
   %call185 = call i8* @__memset_chk(i8* %call51i, i32 0, i64 %add52.i.i, i64 -1)
   ret i32 4
 }
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
index 14ea2ed..2af391f 100644
--- a/llvm/test/Transforms/InstCombine/objsize.ll
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -112,7 +112,7 @@
   %1 = bitcast %struct.data* %0 to i8*
   %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memset.p0i8.i32(i8* align 8 %1, i8 0, i32 1824, i1 false)
+; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
   store i8* %1, i8** %esc
   ret i32 0
@@ -128,7 +128,7 @@
   %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8*, i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 %1, i32 10, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
   %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
   ret i8* %0
 }
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
index 9d0d8e2..7cc6710 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -120,9 +120,9 @@
   %hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0
   %target = alloca [1024 x i8]
   %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
   ret i32 0
 
 ; CHECK-LABEL: @MemCpy(
@@ -130,7 +130,7 @@
 ; CHECK: ret i32 0
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare i32 @strcmp(i8*, i8*) #0
 
diff --git a/llvm/test/Transforms/InstCombine/sprintf-1.ll b/llvm/test/Transforms/InstCombine/sprintf-1.ll
index 1fbdc43..ddf2f2f 100644
--- a/llvm/test/Transforms/InstCombine/sprintf-1.ll
+++ b/llvm/test/Transforms/InstCombine/sprintf-1.ll
@@ -22,7 +22,7 @@
 ; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 13, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -66,7 +66,7 @@
   call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i8* %str)
 ; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
 ; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %str, i32 [[LENINC]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false)
   ret void
 ; CHECK-NEXT: ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/stack-overalign.ll b/llvm/test/Transforms/InstCombine/stack-overalign.ll
index c7d9d51..a8f086e 100644
--- a/llvm/test/Transforms/InstCombine/stack-overalign.ll
+++ b/llvm/test/Transforms/InstCombine/stack-overalign.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | grep "align 32" | count 1
 
 ; It's tempting to have an instcombine in which the src pointer of a
 ; memcpy is aligned up to the alignment of the destination, however
@@ -15,20 +15,15 @@
 
 @dst = global [1024 x i8] zeroinitializer, align 32
 
-; Make sure the dest gets the align 32 from the global
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 32
-; But that the source continues to not have an alignment set.
-; CHECK: i8* %src1
-
 define void @foo() nounwind {
 entry:
   %src = alloca [1024 x i8], align 1
   %src1 = getelementptr [1024 x i8], [1024 x i8]* %src, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1, i1 false)
   call void @frob(i8* %src1) nounwind
   ret void
 }
 
 declare void @frob(i8*)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
index c8b6647..2fcc34b 100644
--- a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -16,7 +16,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
   ret i8* %ret
@@ -27,7 +27,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12)
   ret i8* %ret
@@ -38,7 +38,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
index 23c6998..7a21a49 100644
--- a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -16,7 +16,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
   ret i8* %ret
@@ -27,7 +27,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
   ret i8* %ret
@@ -38,7 +38,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
index bc3ff35..03690b9 100644
--- a/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
@@ -16,7 +16,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
   ret i8* %ret
@@ -27,7 +27,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
index c237881..c75a839 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 ; Verify that instcombine preserves TBAA tags when converting a memcpy into
 ; a scalar load and store.
@@ -17,7 +17,7 @@
 entry:
   %0 = bitcast %struct.test1* %a to i8*
   %1 = bitcast %struct.test1* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 4, i1 false), !tbaa.struct !3
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 4, i32 4, i1 false), !tbaa.struct !3
   ret void
 }
 
@@ -29,7 +29,7 @@
 ; CHECK: ret
   %tmp = alloca %struct.test2, align 8
   %tmp1 = bitcast %struct.test2* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* undef, i64 8, i1 false), !tbaa.struct !4
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* undef, i64 8, i32 8, i1 false), !tbaa.struct !4
   %tmp2 = getelementptr %struct.test2, %struct.test2* %tmp, i32 0, i32 0
   %tmp3 = load i32 (i8*, i32*, double*)**, i32 (i8*, i32*, double*)*** %tmp2
   ret i32 (i8*, i32*, double*)*** %tmp2
diff --git a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
index c709b9a..67cabf3 100644
--- a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
@@ -7,7 +7,7 @@
 define void @test10(i8 addrspace(2)* %X) nounwind ssp {
 ; CHECK-LABEL: @test10(
 ; CHECK: entry:
-; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* align 1 %X, i8 0, i16 10000, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* %X, i8 0, i16 10000, i32 1, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 
diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 529a8f9..27a9551 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -24,7 +24,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test1(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -47,7 +47,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test1a(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -70,7 +70,7 @@
 ; CHECK-LABEL: @test2(
 ; CHECK: br i1 %cmp10,
 ; CHECK: %0 = shl i64 %Size, 2
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %Base1, i8 1, i64 %0, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -119,7 +119,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-TODO-LABEL: @test4(
-; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i1 false)
+; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
 ; CHECK-TODO-NOT: store
 }
 
@@ -166,7 +166,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test6(
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
@@ -191,7 +191,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test7(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -276,7 +276,7 @@
   ret void
 ; CHECK-LABEL: @test10(
 ; CHECK: entry:
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
@@ -324,7 +324,7 @@
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: bitcast
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 %P1, i8 0, i64 80000, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
@@ -441,7 +441,7 @@
 for.cond.cleanup:
   ret void
 ; CHECK-LABEL: @test15(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %f1, i8 0, i64 262148, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index d784d35..f73addd 100644
--- a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -25,12 +25,12 @@
   call fastcc void @initialize(%0* noalias sret %memtmp)
   %tmp1 = bitcast %0* %tmp to i8*
   %memtmp2 = bitcast %0* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false)
   %z3 = bitcast %0* %z to i8*
   %tmp4 = bitcast %0* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false)
   %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 26c221d..ffbb299 100644
--- a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -13,10 +13,10 @@
   call void @g(%a* %a_var)
   %a_i8 = bitcast %a* %a_var to i8*
   %b_i8 = bitcast %b* %b_var to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 1, i1 false)
   %tmp1 = getelementptr %b, %b* %b_var, i32 0, i32 0
   %tmp2 = load float, float* %tmp1
   ret float %tmp2
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/align.ll b/llvm/test/Transforms/MemCpyOpt/align.ll
index 550bc5d..9074684 100644
--- a/llvm/test/Transforms/MemCpyOpt/align.ll
+++ b/llvm/test/Transforms/MemCpyOpt/align.ll
@@ -1,15 +1,15 @@
 ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 ; The resulting memset is only 4-byte aligned, despite containing
 ; a 16-byte aligned store in the middle.
 
 define void @foo(i32* %p) {
 ; CHECK-LABEL: @foo(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
   %a0 = getelementptr i32, i32* %p, i64 0
   store i32 0, i32* %a0, align 4
   %a1 = getelementptr i32, i32* %p, i64 1
@@ -31,7 +31,7 @@
   %a8 = alloca i32, align 8
   %a8.cast = bitcast i32* %a8 to i8*
   %a4.cast = bitcast i32* %a4 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %a8.cast, i8 0, i64 4, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a4.cast, i8* %a8.cast, i64 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a8.cast, i8 0, i64 4, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a4.cast, i8* %a8.cast, i64 4, i32 4, i1 false)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/atomic.ll b/llvm/test/Transforms/MemCpyOpt/atomic.ll
index 38a0e12..5be6b15 100644
--- a/llvm/test/Transforms/MemCpyOpt/atomic.ll
+++ b/llvm/test/Transforms/MemCpyOpt/atomic.ll
@@ -7,7 +7,7 @@
 
 declare void @otherf(i32*)
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 ; memcpyopt should not touch atomic ops
 define void @test1() nounwind uwtable ssp {
@@ -15,7 +15,7 @@
 ; CHECK: store atomic
   %x = alloca [101 x i32], align 16
   %bc = bitcast [101 x i32]* %x to i8*
-  call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false)
   %gep1 = getelementptr inbounds [101 x i32], [101 x i32]* %x, i32 0, i32 100
   store atomic i32 0, i32* %gep1 unordered, align 4
   %gep2 = getelementptr inbounds [101 x i32], [101 x i32]* %x, i32 0, i32 0
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll
index 9a376d6..b6ea129 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll
@@ -6,14 +6,14 @@
 define void @test(i8* %src) {
   %tmp = alloca i8
   %dst = alloca i8
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i1 false), !noalias !2
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i1 false)
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i32 8, i1 false), !noalias !2
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i32 8, i1 false)
 
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
 
 ; Check that the noalias for "dst" was removed by checking that the metadata is gone
 ; CHECK-NOT: "dst"
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
index a1ba2ba..4c3e3e8 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) unnamed_addr nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 ; all bytes of %dst that are touch by the memset are dereferenceable
 define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
@@ -11,8 +11,8 @@
 ; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
   %src = alloca [4096 x i8], align 1
   %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0
-  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i1 false) #2
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
   ret void
 }
 
@@ -24,7 +24,7 @@
 ; CHECK: call void @llvm.memset.p0i8.i64
   %src = alloca [4096 x i8], align 1
   %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0
-  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i1 false) #2
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
index 2671a9a..17614fd 100644
--- a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
+++ b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
@@ -3,13 +3,13 @@
 target datalayout = "e"
 
 declare void @foo(i8*)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 define void @test() {
   %ptr1 = alloca i8
   %ptr2 = alloca i8
   call void @foo(i8* %ptr2)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i32 1, i1 false)
   call void @foo(i8* %ptr1)
   ret void
 
diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll
index 836a610..7d7f3a6 100644
--- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll
@@ -152,11 +152,11 @@
         
 ; CHECK-LABEL: @test2(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %tmp41, i8 -1, i64 8, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false)
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 32, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false)
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 0, i64 32, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret
 }
@@ -171,11 +171,11 @@
   store i32 0, i32* %arrayidx, align 4
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test3(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
 }
 
 ; store followed by memset, different offset scenario
@@ -184,40 +184,40 @@
   store i32 0, i32* %P, align 4
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 1
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test4(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 ; Memset followed by store.
 define void @test5(i32* nocapture %P) nounwind ssp {
 entry:
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
   store i32 0, i32* %arrayidx, align 4
   ret void
 ; CHECK-LABEL: @test5(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
 }
 
 ;; Memset followed by memset.
 define void @test6(i32* nocapture %P) nounwind ssp {
 entry:
   %0 = bitcast i32* %P to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
   %1 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test6(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 24, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
 }
 
 ; More aggressive heuristic
@@ -233,7 +233,7 @@
   %4 = getelementptr inbounds i32, i32* %c, i32 4
   store i32 -1, i32* %4, align 4
 ; CHECK-LABEL: @test7(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %5, i8 -1, i64 20, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
   ret void
 }
 
@@ -270,17 +270,17 @@
   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1
   ret void
 ; CHECK-LABEL: @test9(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false)
 }
 
 ; PR19092
 define void @test10(i8* nocapture %P) nounwind {
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i1 false)
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false)
   ret void
 ; CHECK-LABEL: @test10(
 ; CHECK-NOT: memset
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 0, i64 42, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
 ; CHECK-NOT: memset
 ; CHECK: ret void
 }
@@ -290,12 +290,12 @@
 entry:
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i32, i32* %P, i64 0
   %arrayidx.cast = bitcast i32* %arrayidx to i96*
   store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4
   ret void
 ; CHECK-LABEL: @test11(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 1, i64 23, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 1, i64 23, i32 4, i1 false)
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
index fee7409..e3e57f0 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
@@ -8,15 +8,15 @@
   %a = alloca [8 x i64], align 8
   %a.cast = bitcast [8 x i64]* %a to i8*
   call void @llvm.lifetime.start(i64 64, i8* %a.cast)
-  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
   %sret.cast = bitcast [8 x i64]* %sret to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
   call void @llvm.lifetime.end(i64 64, i8* %a.cast)
   ret void
 
 ; CHECK-LABEL: @foo(
 ; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 %[[sret_cast]], i8 0, i64 64
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64
 ; CHECK-NOT: call void @llvm.memcpy
 ; CHECK: ret void
 }
@@ -26,24 +26,24 @@
   %a = alloca [8 x i64], align 8
   %a.cast = bitcast [8 x i64]* %a to i8*
   call void @llvm.lifetime.start(i64 64, i8* %a.cast)
-  call void @llvm.memset.p0i8.i64(i8* align 8 %a.cast, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
   %sret.cast = bitcast [8 x i64]* %sret to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %sret.cast, i8* align 8 %a.cast, i64 64, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* align 8 %a.cast, i8 42, i64 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 42, i64 32, i32 8, i1 false)
   %out.cast = bitcast [8 x i64]* %out to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %out.cast, i8* align 8 %a.cast, i64 64, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out.cast, i8* %a.cast, i64 64, i32 8, i1 false)
   call void @llvm.lifetime.end(i64 64, i8* %a.cast)
   ret void
 
 ; CHECK-LABEL: @bar(
 ; CHECK:         %[[a:[^=]+]] = alloca [8 x i64]
 ; CHECK:         %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8*
-; CHECK:         call void @llvm.memset.p0i8.i64(i8* align 8 %[[a_cast]], i8 0, i64 64
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64
 ; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
-; CHECK:         call void @llvm.memset.p0i8.i64(i8* align 8 %[[sret_cast]], i8 0, i64 64
-; CHECK:         call void @llvm.memset.p0i8.i64(i8* align 8 %[[a_cast]], i8 42, i64 32
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32
 ; CHECK:         %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8*
-; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[out_cast]], i8* align 8 %[[a_cast]], i64 64
+; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64
 ; CHECK-NOT: call void @llvm.memcpy
 ; CHECK: ret void
 }
@@ -51,5 +51,5 @@
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
index 8f27fa9..fd8b93c 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
@@ -2,13 +2,13 @@
 
 @cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 declare void @foo(i32*) nounwind
 
 define void @test1() nounwind {
   %arr = alloca [3 x i32], align 4
   %arr_i8 = bitcast [3 x i32]* %arr to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i32 4, i1 false)
   %arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %arr, i64 0, i64 0
   call void @foo(i32* %arraydecay) nounwind
   ret void
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
index 9d8db7e..c75d020 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
@@ -11,7 +11,7 @@
   store i8 98, i8* %1, align 4
   %2 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 1, i64 0
   %3 = getelementptr inbounds [7 x i8], [7 x i8]* %bletch.sroa.1, i64 0, i64 0
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 7, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 7, i32 1, i1 false)
   %4 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 2
   store i32 20, i32* %4, align 4
   ret i32 undef
@@ -23,7 +23,7 @@
 
 define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
   call void @llvm.lifetime.start(i64 8, i8* %in)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false)
   ret void
 
 ; Check that the memcpy is removed.
@@ -33,7 +33,7 @@
 
 define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
   call void @llvm.lifetime.start(i64 4, i8* %in)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false)
   ret void
 
 ; Check that the memcpy is not removed.
@@ -41,6 +41,6 @@
 ; CHECK: call void @llvm.memcpy
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index e5a8b62..6181543 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -14,9 +14,9 @@
   call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
   %tmp219 = bitcast %0* %tmp2 to i8*
   %memtmp20 = bitcast %0* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %tmp219, i8* align 16 %memtmp20, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false)
   %agg.result21 = bitcast %0* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result21, i8* align 16 %tmp219, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false)
   ret void
 
 ; Check that one of the memcpy's are removed.
@@ -37,12 +37,12 @@
 define void @test2(i8* %P, i8* %Q) nounwind  {
   %memtmp = alloca %0, align 16
   %R = bitcast %0* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
   ret void
         
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* align 16 %Q, i8* align 16 %P
+; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
 ; CHECK-NEXT: ret void
 }
 
@@ -54,9 +54,9 @@
 define void @test3(%0* noalias sret %agg.result) nounwind  {
   %x.0 = alloca %0
   %x.01 = bitcast %0* %x.0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x.01, i8* align 16 bitcast (%0* @x to i8*), i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false)
   %agg.result2 = bitcast %0* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result2, i8* align 16 %x.01, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
   ret void
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: %agg.result1 = bitcast 
@@ -69,7 +69,7 @@
 define void @test4(i8 *%P) {
   %A = alloca %1
   %a = bitcast %1* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %P, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
   call void @test4a(i8* align 1 byval %a)
   ret void
 ; CHECK-LABEL: @test4(
@@ -77,8 +77,8 @@
 }
 
 declare void @test4a(i8* align 1 byval)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
 
 %struct.S = type { i128, [4 x i8]}
 
@@ -92,7 +92,7 @@
 entry:
   %y = alloca %struct.S, align 16
   %tmp = bitcast %struct.S* %y to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
   %a = getelementptr %struct.S, %struct.S* %y, i64 0, i32 1, i64 0
   store i8 4, i8* %a
   call void @test5a(%struct.S* align 16 byval %y)
@@ -104,7 +104,7 @@
 
 ;; Noop memcpy should be zapped.
 define void @test6(i8 *%P) {
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false)
   ret void
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: ret void
@@ -120,7 +120,7 @@
   %agg.tmp = alloca %struct.p, align 4
   %tmp = bitcast %struct.p* %agg.tmp to i8*
   %tmp1 = bitcast %struct.p* %q to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
   %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
   ret i32 %call
 ; CHECK-LABEL: @test7(
@@ -129,7 +129,7 @@
 
 declare i32 @g(%struct.p* align 8 byval)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 ; PR11142 - When looking for a memcpy-memcpy dependency, don't get stuck on
 ; instructions between the memcpy's that only affect the destination pointer.
@@ -140,10 +140,10 @@
 ; CHECK-NOT: memcpy
   %A = tail call i8* @malloc(i32 10)
   %B = getelementptr inbounds i8, i8* %A, i64 2
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i32 1, i1 false)
   %C = tail call i8* @malloc(i32 10)
   %D = getelementptr inbounds i8, i8* %C, i64 2
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i32 1, i1 false)
   ret void
 ; CHECK: ret void
 }
@@ -164,7 +164,7 @@
   call void @f1(%struct.big* sret %tmp)
   %0 = addrspacecast %struct.big* %b to i8 addrspace(1)*
   %1 = addrspacecast %struct.big* %tmp to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i64 200, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i64 200, i32 4, i1 false)
   call void @f2(%struct.big* %b)
   ret void
 }
@@ -180,7 +180,7 @@
   call void @f1(%struct.big* sret %tmp)
   %0 = bitcast %struct.big* %b to i8*
   %1 = bitcast %struct.big* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 200, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 200, i32 4, i1 false)
   call void @f2(%struct.big* %b)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/memmove.ll b/llvm/test/Transforms/MemCpyOpt/memmove.ll
index 91f2851..1af85a1 100644
--- a/llvm/test/Transforms/MemCpyOpt/memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memmove.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
 
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define i8* @test1(i8* nocapture %src) nounwind {
 entry:
@@ -14,7 +14,7 @@
   %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32))
   %call3 = bitcast i8* %malloccall to [13 x i8]*
   %call3.sub = getelementptr inbounds [13 x i8], [13 x i8]* %call3, i64 0, i64 0
-  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false)
   ret i8* %call3.sub
 }
 declare noalias i8* @malloc(i32)
@@ -25,7 +25,7 @@
 ; CHECK-LABEL: @test2(
 ; CHECK: call void @llvm.memcpy
   %add.ptr = getelementptr i8, i8* %P, i64 16
-  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
   ret void
 }
 
@@ -35,6 +35,6 @@
 ; CHECK-LABEL: @test3(
 ; CHECK: call void @llvm.memmove
   %add.ptr = getelementptr i8, i8* %P, i64 16
-  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
index 4f50010..993c8a1 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
@@ -7,12 +7,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %c, i64 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
   ret void
 }
 
@@ -22,12 +22,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i32_i64(i8* %dst, i8* %src, i32 %dst_size, i64 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i32(i8* align 1 %dst, i8 %c, i32 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
   ret void
 }
 
@@ -37,12 +37,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i128 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i128 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[DST]], i8 %c, i128 [[SIZE]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i128_i32(i8* %dst, i8* %src, i128 %dst_size, i32 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i128(i8* align 1 %dst, i8 %c, i128 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
+  call void @llvm.memset.p0i8.i128(i8* %dst, i8 %c, i128 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
   ret void
 }
 
@@ -52,12 +52,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i128 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i128 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* align 1 %dst, i8* align 1 %src, i128 %src_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[DST]], i8 %c, i128 [[SIZE]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i32_i128(i8* %dst, i8* %src, i32 %dst_size, i128 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i32(i8* align 1 %dst, i8 %c, i32 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i128(i8* align 1 %dst, i8* align 1 %src, i128 %src_size, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false)
   ret void
 }
 
@@ -67,36 +67,36 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i64_i32(i8* %dst, i8* %src, i64 %dst_size, i32 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %c, i64 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_same
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
 define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 80, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_min
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 {{.*}}, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 4, i1 false)
 define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 36, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_memcpy
-; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
 define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 80, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false)
   ret void
 }
 
@@ -106,37 +106,37 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) {
   %dst = bitcast i64* %dst_pi64 to i8*
-  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %c, i64 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_dst
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %dst_size, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst2, i8* align 1 %src, i64 %src_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %dst_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst2, i8* align 1 %src, i64 %src_size, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
   ret void
 }
 
 ; Make sure we also take into account dependencies on the destination.
 
 ; CHECK-LABEL: define i8 @test_intermediate_read
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %a, i8 0, i64 64, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false)
 ; CHECK-NEXT: %r = load i8, i8* %a
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 24, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false)
 ; CHECK-NEXT: ret i8 %r
 define i8 @test_intermediate_read(i8* %a, i8* %b) #0 {
-  call void @llvm.memset.p0i8.i64(i8* align 1 %a, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false)
   %r = load i8, i8* %a
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 24, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false)
   ret i8 %r
 }
 
@@ -146,23 +146,23 @@
 ; CHECK-NEXT: %a = alloca %struct
 ; CHECK-NEXT: %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0
 ; CHECK-NEXT: %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %a0, i8 0, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i32 1, i1 false)
 ; CHECK-NEXT: store i8 1, i8* %a1
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a0, i8* align 1 %b, i64 8, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_intermediate_write(i8* %b) #0 {
   %a = alloca %struct
   %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0
   %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0
-  call void @llvm.memset.p0i8.i64(i8* align 1 %a0, i8 0, i64 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i32 1, i1 false)
   store i8 1, i8* %a1
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a0, i8* align 1 %b, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
-declare void @llvm.memset.p0i8.i128(i8* nocapture, i8, i128, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i128(i8* nocapture, i8* nocapture readonly, i128, i1)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memset.p0i8.i128(i8* nocapture, i8, i128, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i128(i8* nocapture, i8* nocapture readonly, i128, i32, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
index b0abab8..1c56704 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
@@ -3,99 +3,99 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK-LABEL: define void @test(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false)
 ; CHECK-NEXT: ret void
 define void @test(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_smaller_memcpy(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_smaller_memcpy(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_smaller_memset(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_smaller_memset(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_memset(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_align_memset(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_types(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_types_2(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_2(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_source_gep(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
 ; CHECK-NEXT: %p = getelementptr i8, i8* %dst1, i64 64
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
   ; FIXME: We could optimize this as well.
   %p = getelementptr i8, i8* %dst1, i64 64
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_variable_size_1(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_variable_size_1(i8* %dst1, i64 %dst1_size, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_variable_size_2(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_variable_size_2(i8* %dst1, i8* %dst2, i64 %dst2_size, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/smaller.ll b/llvm/test/Transforms/MemCpyOpt/smaller.ll
index 17aca87..8f6cafa6 100644
--- a/llvm/test/Transforms/MemCpyOpt/smaller.ll
+++ b/llvm/test/Transforms/MemCpyOpt/smaller.ll
@@ -4,7 +4,7 @@
 ; Memcpyopt shouldn't optimize the second memcpy using the first
 ; because the first has a smaller size.
 
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
 
 target datalayout = "e-p:32:32:32"
 
@@ -14,15 +14,15 @@
 @cell = external global %struct.s
 
 declare void @check(%struct.s* byval %p) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 define void @foo() nounwind {
 entry:
   %agg.tmp = alloca %struct.s, align 4
   store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1, i1 false)
   %tmp = getelementptr inbounds %struct.s, %struct.s* %agg.tmp, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
   call void @check(%struct.s* byval %agg.tmp)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/sret.ll b/llvm/test/Transforms/MemCpyOpt/sret.ll
index a99b52d..34ba4c4 100644
--- a/llvm/test/Transforms/MemCpyOpt/sret.ll
+++ b/llvm/test/Transforms/MemCpyOpt/sret.ll
@@ -21,10 +21,10 @@
   call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
   %memtmp14 = bitcast %0* %memtmp to i8*
   %agg.result15 = bitcast %0* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result15, i8* align 16 %memtmp14, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false)
   ret void
 }
 
 declare void @ccoshl(%0* noalias nocapture sret, %0* byval) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/MergeFunc/vector.ll b/llvm/test/Transforms/MergeFunc/vector.ll
index db95ec7..ef13753 100644
--- a/llvm/test/Transforms/MergeFunc/vector.ll
+++ b/llvm/test/Transforms/MergeFunc/vector.ll
@@ -59,7 +59,7 @@
 
 declare void @_ZdlPv(i8*) nounwind
 
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 declare void @_ZSt17__throw_bad_allocv() noreturn
 
diff --git a/llvm/test/Transforms/MetaRenamer/metarenamer.ll b/llvm/test/Transforms/MetaRenamer/metarenamer.ll
index 90d861d..213fbe3 100644
--- a/llvm/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/llvm/test/Transforms/MetaRenamer/metarenamer.ll
@@ -35,11 +35,11 @@
   store double 4.000000e+00, double* %6, align 8
   %7 = bitcast %struct.foo_xxx* %agg.result to i8*
   %8 = bitcast %struct.foo_xxx* %1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 24, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 24, i32 8, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define i32 @func_5_xxx(i32 %arg_1_xxx, i32 %arg_2_xxx, i32 %arg_3_xxx, i32 %arg_4_xxx) nounwind uwtable ssp {
   %1 = alloca i32, align 4
diff --git a/llvm/test/Transforms/ObjCARC/nested.ll b/llvm/test/Transforms/ObjCARC/nested.ll
index 589300c..cf14a1f9 100644
--- a/llvm/test/Transforms/ObjCARC/nested.ll
+++ b/llvm/test/Transforms/ObjCARC/nested.ll
@@ -12,7 +12,7 @@
 declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 declare i8* @objc_retain(i8*)
 declare void @objc_enumerationMutation(i8*)
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 declare void @use(i8*)
 declare void @objc_release(i8*)
@@ -35,7 +35,7 @@
   %items.ptr = alloca [16 x i8*], align 8
   %0 = call i8* @objc_retain(i8* %a) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -100,7 +100,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -165,7 +165,7 @@
   %tmp = load i8*, i8** @g, align 8
   %0 = call i8* @objc_retain(i8* %tmp) nounwind
   %tmp2 = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp4 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp4, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -230,7 +230,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -296,7 +296,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -365,7 +365,7 @@
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   call void @callee()
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -431,7 +431,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -508,7 +508,7 @@
   %call1 = call i8* @returner()
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -575,7 +575,7 @@
   %call1 = call i8* @returner()
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -642,7 +642,7 @@
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   call void @callee()
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -710,7 +710,7 @@
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   call void @callee()
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
diff --git a/llvm/test/Transforms/PlaceSafepoints/memset.ll b/llvm/test/Transforms/PlaceSafepoints/memset.ll
index e630fd0..534b2f1 100644
--- a/llvm/test/Transforms/PlaceSafepoints/memset.ll
+++ b/llvm/test/Transforms/PlaceSafepoints/memset.ll
@@ -5,13 +5,13 @@
 ; CHECK-NEXT: llvm.memset
 ; CHECK: do_safepoint
 ; CHECK: @foo
-  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %ptr, i8 0, i64 24, i1 false)
+  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %ptr, i8 0, i64 24, i32 8, i1 false)
   call void @foo()
   ret void
 }
 
 declare void @foo()
-declare void @llvm.memset.p1i8.i64(i8 addrspace(1)*, i8, i64, i1)
+declare void @llvm.memset.p1i8.i64(i8 addrspace(1)*, i8, i64, i32, i1)
 
 declare void @do_safepoint()
 define void @gc.safepoint_poll() {
diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll
index f76bd46..004695d 100644
--- a/llvm/test/Transforms/SROA/address-spaces.ll
+++ b/llvm/test/Transforms/SROA/address-spaces.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
-declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i1)
-declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1)
-declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
 
 
 ; Make sure an illegal bitcast isn't introduced
@@ -16,9 +16,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
   %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
   ret void
 }
 
@@ -30,9 +30,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
   %bptr = bitcast i16* %b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
   ret void
 }
 
@@ -44,9 +44,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64>* %a to i8*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %aaptr, i8* align 2 %aptr, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
   %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
   ret void
 }
 
@@ -62,7 +62,7 @@
   store [5 x i64] %in.coerce, [5 x i64]* %0, align 8
   %scevgep9 = getelementptr %struct.struct_test_27.0.13, %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0
   %scevgep910 = bitcast i32* %scevgep9 to i8*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 undef, i8* align 4 %scevgep910, i32 16, i1 false)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false)
   ret void
 }
  
diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll
index dd06613..455d142 100644
--- a/llvm/test/Transforms/SROA/alignment.ll
+++ b/llvm/test/Transforms/SROA/alignment.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 
 define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
 ; CHECK-LABEL: @test1(
@@ -23,8 +23,8 @@
 
   store i8 420, i8* %gep_alloca, align 16
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %gep_alloca, i8* align 16 %gep_a, i32 2, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %gep_b, i8* align 16 %gep_alloca, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_alloca, i8* %gep_a, i32 2, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false)
   ret void
 }
 
@@ -57,9 +57,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64>* %a to i8*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %aaptr, i8* align 2 %aptr, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
   %bptr = bitcast i16* %b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
   ret void
 }
 
@@ -77,10 +77,10 @@
   %a = alloca { i8*, i8*, i8* }
   %b = alloca { i8*, i8*, i8* }
   %a_raw = bitcast { i8*, i8*, i8* }* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %a_raw, i8* align 8 %x, i32 22, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a_raw, i8* %x, i32 22, i32 8, i1 false)
   %b_raw = bitcast { i8*, i8*, i8* }* %b to i8*
   %b_gep = getelementptr i8, i8* %b_raw, i32 6
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %b_gep, i8* align 2 %x, i32 18, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false)
   ret void
 }
 
@@ -155,7 +155,7 @@
   %raw2 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 8
   %ptr2 = bitcast i8* %raw2 to double*
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i32 0, i1 false)
 ; CHECK: %[[val2:.*]] = load double, double* %{{.*}}, align 1
 ; CHECK: %[[val1:.*]] = load double, double* %{{.*}}, align 1
 
@@ -165,7 +165,7 @@
   store double %val1, double* %ptr1, align 1
   store double %val2, double* %ptr2, align 1
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i32 0, i1 false)
 ; CHECK: store double %[[val1]], double* %{{.*}}, align 1
 ; CHECK: store double %[[val2]], double* %{{.*}}, align 1
 
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll
index d9d87a3..7b5daa9 100644
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -80,31 +80,31 @@
 ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
 
   %b = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
 ; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207
 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
 
   ; Clobber a single element of the array, this should be promotable.
   %c = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 42
@@ -244,59 +244,59 @@
 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
 
   %overlap2.prefix = getelementptr i8, i8* %overlap2.1.1.i8, i64 -4
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 39
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 3
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 3
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 3
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 5
 
   ; Bridge between the overlapping areas
-  call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 5
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 5
 ; ...promoted i8 store...
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 2
 
   ; Entirely within the second overlap.
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
 
   ; Trailing past the second overlap.
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 3
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 42
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42
 ; CHECK-NEXT: store i8 0, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 142
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 158
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 200
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 207
 ; CHECK-NEXT: store i8 42, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 208
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 215
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
 
   ret void
 }
@@ -315,9 +315,9 @@
 ; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
 
   %b = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 20
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 20
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]]
@@ -325,10 +325,10 @@
 ; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 23
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 30
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 40
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]]
@@ -336,7 +336,7 @@
 ; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 50
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]]
@@ -344,31 +344,31 @@
 ; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 53
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 60
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
 
   %a.src.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 20
   %a.dst.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 40
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 
   ; Clobber a single element of the array, this should be promotable, and be deleted.
   %c = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 42
   store i8 0, i8* %c
 
   %a.src.2 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 50
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i32 1, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 20
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 20
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]]
@@ -376,10 +376,10 @@
 ; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 23
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 30
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 40
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
@@ -387,7 +387,7 @@
 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 50
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
@@ -395,18 +395,18 @@
 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 53
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 60
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
 
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define i16 @test5() {
 ; CHECK-LABEL: @test5(
@@ -436,7 +436,7 @@
 entry:
   %a = alloca [4 x i8]
   %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i1 true)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
   %iptr = bitcast i8* %ptr to i32*
   %val = load i32, i32* %iptr
   ret i32 %val
@@ -456,8 +456,8 @@
 entry:
   %a = alloca [4 x i8]
   %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
   ret void
 }
 
@@ -543,7 +543,7 @@
 entry:
   %a = alloca [8 x i8]
   %ptr = getelementptr [8 x i8], [8 x i8]* %a, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
   %s2ptrptr = bitcast i8* %ptr to %S2**
   %s2ptr = load %S2*, %S2** %s2ptrptr
   ret %S2* %s2ptr
@@ -743,10 +743,10 @@
 entry:
   %a = alloca [3 x i8]
   %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 false)
   %cast = bitcast i8* %ptr to i24*
   store i24 0, i24* %cast
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 false)
   ret void
 }
 
@@ -763,8 +763,8 @@
 entry:
   %a = alloca [3 x i8]
   %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
   ret void
 }
 
@@ -780,7 +780,7 @@
 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[agep2]], i8 42, i32 %size,
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[agep2]], i8 42, i32 %size,
 ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
 ; CHECK-NEXT: store i32 42, i32* %[[dstcast1]]
 ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8, i8* %dst, i64 4
@@ -793,14 +793,14 @@
 entry:
   %a = alloca [42 x i8]
   %ptr = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 false)
   %ptr2 = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 8
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i32 1, i1 false)
   %cast = bitcast i8* %ptr to i32*
   store i32 42, i32* %cast
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i32 1, i1 false)
   ret void
 }
 
@@ -819,7 +819,7 @@
   %a = alloca { i64, i8* }
   %cast1 = bitcast %opaque* %x to i8*
   %cast2 = bitcast { i64, i8* }* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i32 1, i1 false)
   %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0
   %val = load i64, i64* %gep
   ret i32 undef
@@ -853,7 +853,7 @@
   ret i32 %sum2
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 define i8 @test21() {
 ; Test allocations and offsets which border on overflow of the int64_t used
@@ -869,7 +869,7 @@
   store i8 255, i8* %gep0
   %gep1 = getelementptr [2305843009213693951 x i8], [2305843009213693951 x i8]* %a, i64 0, i64 -9223372036854775807
   %gep2 = getelementptr i8, i8* %gep1, i64 -1
-  call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i32 1, i1 false)
   %gep3 = getelementptr i8, i8* %gep1, i64 9223372036854775807
   %gep4 = getelementptr i8, i8* %gep3, i64 9223372036854775807
   %gep5 = getelementptr i8, i8* %gep4, i64 -6917529027641081857
@@ -894,7 +894,7 @@
 
 entry:
   %a = alloca i8
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
   %tmp2 = load i8, i8* %a
   ret void
 }
@@ -914,7 +914,7 @@
 if.then:
   %tmp0 = bitcast %PR13916.struct* %a to i8*
   %tmp1 = bitcast %PR13916.struct* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false)
   br label %if.end
 
 if.end:
@@ -992,7 +992,7 @@
   store %PR14034.list* undef, %PR14034.list** %prev
   %cast0 = bitcast %PR14034.struct* undef to i8*
   %cast1 = bitcast %PR14034.struct* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
   ret void
 }
 
@@ -1065,15 +1065,15 @@
 
   ; Also use a memset to the middle 32-bits for fun.
   %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8, i8* %0, i32 2
-  call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
 
   ; Or a memset of the whole thing.
-  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
 
   ; Write to the high 32-bits with a memcpy.
   %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8, i8* %0, i32 4
   %d.raw = bitcast double* %d to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
 
   ; Store to the high 32-bits...
   %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
@@ -1146,7 +1146,7 @@
 
   %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
   ret void
 ; CHECK: ret
 }
@@ -1164,7 +1164,7 @@
 
   %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i1 true)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
   ret void
 ; CHECK: ret
 }
@@ -1177,7 +1177,7 @@
   %stack = alloca [1048576 x i32], align 16
 ; CHECK: alloca [1048576 x i32]
   %cast = bitcast [1048576 x i32]* %stack to i8*
-  call void @llvm.memset.p0i8.i64(i8* %cast, i8 -2, i64 4194304, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %cast, i8 -2, i64 4194304, i32 16, i1 false)
   ret void
 ; CHECK: ret
 }
@@ -1206,7 +1206,7 @@
 ; CHECK-NEXT: {{.*}} = load i8, i8* %[[b]], align 8
 
   %a.i8 = bitcast <{ i1 }>* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i1 false) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
   %bar = load i8, i8* %a.i8, align 1
   %a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0
   %baz = load i1, i1* %a.i1, align 1
@@ -1261,7 +1261,7 @@
 ; CHECK: alloca
 
   %a.i8 = bitcast i32* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
   %v = load i32, i32* %a
   ret i32 %v
 }
@@ -1323,7 +1323,7 @@
 
 end:
   %tmp.raw = bitcast [4 x i8]* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i32 1, i1 false)
   ret void
 ; CHECK: ret void
 }
@@ -1376,7 +1376,7 @@
 entry:
   %b = alloca i32, align 4
   %b.cast = bitcast i32* %b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i32 4, i1 true)
   %b.gep = getelementptr inbounds i8, i8* %b.cast, i32 2
   load i8, i8* %b.gep, align 2
   unreachable
@@ -1413,7 +1413,7 @@
   %gep0 = getelementptr inbounds i32, i32* %a, i32 0
   %cast1 = bitcast i32* %gep1 to i8*
   %cast0 = bitcast i32* %gep0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i32 1, i1 false)
   ret void
 }
 
@@ -1424,7 +1424,7 @@
 entry:
   %f = alloca i8
   %gep = getelementptr i8, i8* %f, i64 -1
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i32 1, i1 false)
   ret void
 }
 
@@ -1439,8 +1439,8 @@
 entry:
   %a = alloca i64, align 16
   %ptr = bitcast i64* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 true)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 true)
   ret void
 }
 
diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll
index 1b724f3..ea41a20 100644
--- a/llvm/test/Transforms/SROA/big-endian.ll
+++ b/llvm/test/Transforms/SROA/big-endian.ll
@@ -210,7 +210,7 @@
   store i64 34494054408, i64* %a2
   %tmp0 = bitcast { i32, i24 }* %a to i8*
   %tmp1 = bitcast i64* %a2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i32 4, i1 false)
 ; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
 ; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
 ; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
@@ -233,4 +233,4 @@
 ; CHECK: ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
diff --git a/llvm/test/Transforms/SROA/slice-order-independence.ll b/llvm/test/Transforms/SROA/slice-order-independence.ll
index faf9750..7d57be6d 100644
--- a/llvm/test/Transforms/SROA/slice-order-independence.ll
+++ b/llvm/test/Transforms/SROA/slice-order-independence.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 ; Check that the chosen type for a split is independent from the order of
 ; slices even in case of types that are skipped because their width is not a
@@ -12,7 +12,7 @@
   %arg = alloca { i16*, i32 }, align 8
   %2 = bitcast { i16*, i32 }* %0 to i8*
   %3 = bitcast { i16*, i32 }* %arg to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
   %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
   %pb0 = bitcast i16** %b to i63*
   %b0 = load i63, i63* %pb0
@@ -27,7 +27,7 @@
   %arg = alloca { i16*, i32 }, align 8
   %2 = bitcast { i16*, i32 }* %0 to i8*
   %3 = bitcast { i16*, i32 }* %arg to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
   %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
   %pb1 = bitcast i16** %b to i8**
   %b1 = load i8*, i8** %pb1
diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll
index 68f891f..6b6ab93 100644
--- a/llvm/test/Transforms/SROA/slice-width.ll
+++ b/llvm/test/Transforms/SROA/slice-width.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @no_split_on_non_byte_width(i32) {
 ; This tests that allocas are not split into slices that are not byte width multiple
@@ -39,7 +39,7 @@
 
   ; Copy from a global.
   %x_i8 = bitcast %union.Foo* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x_i8, i8* bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x_i8, i8* bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i32 16, i1 false)
 
   ; Access a slice of the alloca to trigger SROA.
   %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
@@ -58,7 +58,7 @@
 
   ; Set to all ones.
   %x_i8 = bitcast %union.Foo* %x to i8*
-  call void @llvm.memset.p0i8.i32(i8* %x_i8, i8 -1, i32 32, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %x_i8, i8 -1, i32 32, i32 16, i1 false)
 
   ; Access a slice of the alloca to trigger SROA.
   %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
@@ -68,7 +68,7 @@
 }
 ; CHECK-LABEL: define void @memset_fp80_padding
 ; CHECK: alloca x86_fp80
-; CHECK: call void @llvm.memset.p0i8.i32(i8* align 16 %{{.*}}, i8 -1, i32 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i32(i8* %{{.*}}, i8 -1, i32 16, i32 16, i1 false)
 ; CHECK: store i64 -1, i64* @i64_sink
 
 %S.vec3float = type { float, float, float }
@@ -85,7 +85,7 @@
   %tmp1 = alloca %S.vec3float, align 4
   %0 = bitcast %S.vec3float* %tmp1 to i8*
   %1 = bitcast %S.vec3float* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i32 4, i1 false)
 
   ; The following block does nothing; but appears to confuse SROA
   %unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
@@ -98,7 +98,7 @@
   %3 = bitcast %S.vec3float* %tmp1 to i8*
 ; CHECK: alloca
 ; CHECK-NOT: store <4 x float>
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i32 4, i1 false)
 
   %result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2)
   ret i32 %result
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index 0bf2d23..2d9b26b 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -81,12 +81,12 @@
 ; CHECK-NOT: store
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i32 1, i1 false)
 ; CHECK-NOT: memset
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i32 1, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -120,14 +120,14 @@
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
   %z.cast = bitcast <4 x i32>* %z to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i32 1, i1 false)
 ; CHECK-NOT: memcpy
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i32 1, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -150,7 +150,7 @@
 ; CHECK-NEXT: ret
 }
 
-declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind
 
 ; Same as test4 with a different sized address  space pointer source.
 define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) {
@@ -167,14 +167,14 @@
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
   %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i32 1, i1 false)
 ; CHECK-NOT: memcpy
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i16 0, i16 2
   %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i32 1, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -213,14 +213,14 @@
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
   %a.x.cast = bitcast <4 x i32>* %a.x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i32 1, i1 false)
 ; CHECK-NOT: memcpy
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i32 1, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -242,8 +242,8 @@
 ; CHECK-NEXT: ret
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
 ; CHECK-LABEL: @test6(
@@ -326,7 +326,7 @@
 ; CHECK-NEXT: ret <4 x i32> %[[ret]]
 }
 
-declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind
 
 define <4 x float> @test_subvec_memset() {
 ; CHECK-LABEL: @test_subvec_memset(
@@ -336,23 +336,23 @@
 
   %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
   %a.cast0 = bitcast float* %a.gep0 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false)
 ; CHECK-NOT: store
 ; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
 
   %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
   %a.cast1 = bitcast float* %a.gep1 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 
   %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
   %a.cast2 = bitcast float* %a.gep2 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 
   %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
   %a.cast3 = bitcast float* %a.gep3 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
 ; CHECK-NEXT: insertelement <4 x float> 
 
   %ret = load <4 x float>, <4 x float>* %a
@@ -369,7 +369,7 @@
 
   %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
   %a.cast0 = bitcast float* %a.gep0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
 ; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
 ; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]]
 ; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -377,7 +377,7 @@
 
   %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
   %a.cast1 = bitcast float* %a.gep1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
 ; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]]
 ; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
@@ -385,7 +385,7 @@
 
   %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
   %a.cast2 = bitcast float* %a.gep2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
 ; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]]
 ; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
@@ -393,12 +393,12 @@
 
   %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
   %a.cast3 = bitcast float* %a.gep3 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
 ; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
 ; CHECK-NEXT: %[[f:.*]] = load float, float* %[[fptr]]
 ; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> 
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
 ; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
diff --git a/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
index a4b469f..966b179 100644
--- a/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -13,12 +13,12 @@
   %tmp = alloca %struct.UnionType, align 8
   %tmp2 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
   %tmp13 = getelementptr %struct.UnionType, %struct.UnionType* %p, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
   %tmp5 = load %struct.UnionType*, %struct.UnionType** %pointerToUnion
   %tmp56 = getelementptr %struct.UnionType, %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
   %tmp7 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
index 2117ce3..f597613 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -8,10 +8,10 @@
 entry:
   %temp = alloca [200 x i8]
   %temp1 = bitcast [200 x i8]* %temp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
   %temp3 = bitcast [200 x i8]* %temp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index e245524..c0ff25f 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -12,7 +12,7 @@
   %0 = call i32 @a(%struct.x* %s) nounwind
   %r1 = bitcast %struct.x* %r to i8*
   %s2 = bitcast %struct.x* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
   %1 = getelementptr %struct.x, %struct.x* %r, i32 0, i32 0, i32 1
   %2 = load i32, i32* %1, align 4
   ret i32 %2
@@ -20,4 +20,4 @@
 
 declare i32 @a(%struct.x*)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index f713be4..16d9108 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -18,8 +18,8 @@
         ; because the type of the first element in %struct.two is i8.
 	%tmpS = getelementptr %struct.two, %struct.two* %S, i32 0, i32 0, i32 0 
 	%tmpD = bitcast %struct.two* %D to i8*
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index 0fff595..025578c 100644
--- a/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -12,8 +12,8 @@
         %0 = getelementptr %struct.st, %struct.st* %s, i32 0, i32 0  ; <i16*> [#uses=1]
         store i16 1, i16* %0, align 4
         %s1 = bitcast %struct.st* %s to i8*  ; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %p, i8* align 1 %s1, i32 2, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
index f8f48a1..d1cc424 100644
--- a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -51,14 +51,14 @@
   %18 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
   %19 = bitcast %struct.int16x8x2_t* %0 to i8*
   %20 = bitcast %struct.int16x8x2_t* %18 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
   %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
   %21 = bitcast %struct.int16x8x2_t* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
   %22 = load %struct.int16x8x2_t*, %struct.int16x8x2_t** %dst_addr, align 4
   %23 = bitcast %struct.int16x8x2_t* %22 to i8*
   %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
   br label %return
 
 return:                                           ; preds = %entry
@@ -76,7 +76,7 @@
 cond.true:                                        ; preds = %entry
   %tmp3 = bitcast %struct._NSRange* %range to i8*
   %tmp4 = bitcast %struct._NSRange* %range to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
   ret void
 
 cond.false:                                       ; preds = %entry
@@ -87,4 +87,4 @@
 ; CHECK: br i1
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
index 8472d44..b926b02 100644
--- a/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ b/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@@ -11,8 +11,8 @@
 ; CHECK: ret void
   %1 = alloca %struct.test
   %2 = bitcast %struct.test* %1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll b/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
index 4401307..997d03b 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
@@ -15,7 +15,7 @@
 entry:
   %l_10 = alloca [4 x i32], align 16
   %tmp = bitcast [4 x i32]* %l_10 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false)
 ; CHECK: call void @llvm.memcpy
   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %l_10, i64 0, i64 0
   %call = call i32* @noop(i32* %arrayidx)
@@ -23,4 +23,4 @@
   ret i32 0
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
index f641f46..af6d1f3 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
@@ -13,7 +13,7 @@
 entry:
   %a = alloca <4 x float>, align 16
   %p = bitcast <4 x float>* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
   %vec = load <4 x float>, <4 x float>* %a, align 8
   %val = extractelement <4 x float> %vec, i32 0
   ret float %val
@@ -27,11 +27,11 @@
 entry:
   %a = alloca { <4 x float> }, align 16
   %p = bitcast { <4 x float> }* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
   %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
   %arrayidx = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* %q, i32 0, i32 0
   store <2 x float> undef, <2 x float>* %arrayidx, align 8
   ret void
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll b/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
index 3ea4971..9e31231 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
@@ -12,11 +12,11 @@
 
 for_test158.preheader:                            ; preds = %entry
   %a156286305 = bitcast [4 x <4 x float>]* %a156286 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i32 16, i1 false)
   unreachable
 
 cif_done:                                         ; preds = %entry
   ret void
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll b/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
index abe0422..51d1d14 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
@@ -17,10 +17,10 @@
   store double 1.000000e+00, double* %b, align 8
   %0 = bitcast %struct.S* %retval to i8*
   %1 = bitcast %struct.S* %ret to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false)
   %2 = bitcast %struct.S* %retval to double*
   %3 = load double, double* %2, align 1
   ret double %3
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/address-space.ll b/llvm/test/Transforms/ScalarRepl/address-space.ll
index b34e5e34..b8b90ef 100644
--- a/llvm/test/Transforms/ScalarRepl/address-space.ll
+++ b/llvm/test/Transforms/ScalarRepl/address-space.ll
@@ -16,7 +16,7 @@
   %arrayidx = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
   %tmp1 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
   %tmp2 = bitcast %struct.anon addrspace(2)* %arrayidx to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i1 false)
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false)
   %tmp3 = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1]
   %arrayidx4 = getelementptr inbounds [1 x float], [1 x float]* %tmp3, i32 0, i64 0 ; <float*> [#uses=2]
   %tmp5 = load float, float* %arrayidx4                  ; <float> [#uses=1]
@@ -25,11 +25,11 @@
   %arrayidx7 = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
   %tmp8 = bitcast %struct.anon addrspace(2)* %arrayidx7 to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
   %tmp9 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i1 false)
+  call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
 
-declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
diff --git a/llvm/test/Transforms/ScalarRepl/badarray.ll b/llvm/test/Transforms/ScalarRepl/badarray.ll
index ed6ab2e..6f5bc95 100644
--- a/llvm/test/Transforms/ScalarRepl/badarray.ll
+++ b/llvm/test/Transforms/ScalarRepl/badarray.ll
@@ -48,10 +48,10 @@
   %callret = call %padded *@test3f() ; <i32> [#uses=2]
   %callretcast = bitcast %padded* %callret to i8*                     ; <i8*> [#uses=1]
   %var_11 = bitcast %padded* %var_1 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare %padded* @test3f()
diff --git a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
index 79273dd..97977db 100644
--- a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -75,9 +75,9 @@
   %var = alloca [4 x %padded], align 4
   %vari8 = bitcast [4 x %padded]* %var to i8*
   %pi8 = bitcast [4 x %padded]* %p to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false)
   %qi8 = bitcast [4 x %padded]* %q to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false)
   ret void
 }
 
@@ -98,10 +98,10 @@
   store %homogeneous %res, %homogeneous* %varcast
   %tmp1 = bitcast %wrapped_array* %arr to i8*
   %tmp2 = bitcast %wrapped_array* %var to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false)
   ret void
 }
 
 declare %homogeneous @test6callee(i8* nocapture) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/crash.ll b/llvm/test/Transforms/ScalarRepl/crash.ll
index 715f739..72e9f09 100644
--- a/llvm/test/Transforms/ScalarRepl/crash.ll
+++ b/llvm/test/Transforms/ScalarRepl/crash.ll
@@ -157,7 +157,7 @@
 
 cond_next:              ; preds = %cond_true
         %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false)
         br i1 false, label %cond_next34, label %cond_next79
 
 cond_next34:            ; preds = %cond_next
@@ -195,7 +195,7 @@
         %.compoundliteral = alloca %0           
         %tmp228 = getelementptr %0, %0* %.compoundliteral, i32 0, i32 7
         %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
-        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i1 false)
+        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false)
         unreachable
 }
 
@@ -282,5 +282,5 @@
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/inline-vector.ll b/llvm/test/Transforms/ScalarRepl/inline-vector.ll
index 422d4dd..85f3741 100644
--- a/llvm/test/Transforms/ScalarRepl/inline-vector.ll
+++ b/llvm/test/Transforms/ScalarRepl/inline-vector.ll
@@ -16,7 +16,7 @@
   %vector = alloca %struct.Vector4, align 16
   %agg.tmp = alloca %struct.Vector4, align 16
   %tmp = bitcast %struct.Vector4* %vector to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %entry
@@ -28,7 +28,7 @@
 for.body:                                         ; preds = %for.cond
   %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
   %tmp3 = bitcast %struct.Vector4* %vector to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
   %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
   %1 = load [2 x i64], [2 x i64]* %0, align 16
   %tmp2.i = extractvalue [2 x i64] %1, 0
@@ -49,5 +49,5 @@
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare i32 @printf(...)
diff --git a/llvm/test/Transforms/ScalarRepl/memcpy-align.ll b/llvm/test/Transforms/ScalarRepl/memcpy-align.ll
index 4c9621a..29a1bb8 100644
--- a/llvm/test/Transforms/ScalarRepl/memcpy-align.ll
+++ b/llvm/test/Transforms/ScalarRepl/memcpy-align.ll
@@ -13,9 +13,9 @@
 entry:
   %x0 = alloca %struct.anon, align 4              ; <%struct.anon*> [#uses=2]
   %tmp = bitcast %struct.anon* %x0 to i8*         ; <i8*> [#uses=1]
-  call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i32 4, i1 false)
   %tmp1 = bitcast %struct.anon* %x0 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds (%0, %0* @c, i32
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%0, %0* @c, i32
 0, i32 0, i32 0, i32 0), i8* %tmp1, i32 4, i32 4, i1 false)
   ret void
   
@@ -25,7 +25,7 @@
 ; CHECK: store i8 0, i8*{{.*}}, align 1
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
index dc01d2b..e8088c1 100644
--- a/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ b/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@@ -14,10 +14,10 @@
 	%L = alloca %struct.foo, align 2		; <%struct.foo*> [#uses=1]
 	%L2 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i8*> [#uses=2]
 	%tmp13 = getelementptr %struct.foo, %struct.foo* %P, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i1 false)
+	call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i32 1, i1 false)
 	%tmp5 = load i8, i8* %L2		; <i8> [#uses=1]
 	%tmp56 = sext i8 %tmp5 to i32		; <i32> [#uses=1]
 	ret i32 %tmp56
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
diff --git a/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll b/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
index abdfeab..98e2ddd 100644
--- a/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -14,7 +14,7 @@
 	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
 	%L2 = bitcast %struct.foo* %L to i8*		; <i8*> [#uses=1]
 	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
 	%tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
@@ -25,7 +25,7 @@
 entry:
 	%L = alloca [4 x %struct.foo], align 16		; <[4 x %struct.foo]*> [#uses=2]
 	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
-        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i1 false)
+        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
 	%tmp4 = getelementptr [4 x %struct.foo], [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
@@ -36,7 +36,7 @@
 entry:
 	%B = alloca %struct.bar, align 16		; <%struct.bar*> [#uses=4]
 	%B1 = bitcast %struct.bar* %B to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i1 false)
+	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false)
 	%tmp3 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp3
 	%tmp4 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
@@ -56,12 +56,12 @@
 	store i32 1, i32* %0, align 8
 	%1 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 1		; <i32*> [#uses=1]
 	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i1 false)
+	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
 	%3 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
 	%4 = load i32, i32* %3, align 8		; <i32> [#uses=1]
 	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
 	ret i16 %retval12
 }
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/negative-memset.ll b/llvm/test/Transforms/ScalarRepl/negative-memset.ll
index 480a607..458d961 100644
--- a/llvm/test/Transforms/ScalarRepl/negative-memset.ll
+++ b/llvm/test/Transforms/ScalarRepl/negative-memset.ll
@@ -11,10 +11,10 @@
   %buff = alloca [1 x i8], align 1
   store i32 0, i32* %retval
   %0 = bitcast [1 x i8]* %buff to i8*
-  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
   %arraydecay = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i1 false)	; Negative 8!
+  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false)	; Negative 8!
   ret i32 0
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll
index 72d135d..d0ed20b 100644
--- a/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll
+++ b/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll
@@ -14,14 +14,14 @@
   %agg.tmp = alloca %struct.S, align 4
   %tmp = bitcast %struct.S* %t to i8*
   %tmp1 = bitcast %struct.S* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
   %tmp2 = bitcast %struct.S* %agg.tmp to i8*
   %tmp3 = bitcast %struct.S* %t to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false)
   %call = call i32 (...) @bazz(%struct.S* byval %agg.tmp)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 declare i32 @bazz(...)
diff --git a/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll b/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
index 04d0659..031ad5e 100644
--- a/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -9,7 +9,7 @@
 	store <16 x float> %A, <16 x float>* %tmp
 	%s = bitcast <16 x float>* %tmp to i8*
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i1 false)
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i32 16, i1 false)
 	%R = load <16 x float>, <16 x float>* %tmp2
 	ret <16 x float> %R
 }
@@ -18,11 +18,11 @@
 	%tmp2 = alloca <16 x float>, align 16
 
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i1 false)
+	call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i32 16, i1 false)
 	
 	%R = load <16 x float>, <16 x float>* %tmp2
 	ret <16 x float> %R
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll b/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll
index 97da9e7..fd0a3d5 100644
--- a/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll
+++ b/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll
@@ -4,16 +4,16 @@
 define void @test(i8* noalias dereferenceable(1) %in, i8* noalias dereferenceable(1) %out) {
   %tmp = alloca i8
   %tmp2 = alloca i8
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i1 false), !alias.scope !4
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i1 false), !alias.scope !5
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i32 8, i1 false), !alias.scope !4
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i32 8, i1 false), !alias.scope !5
 
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %tmp2, i64 1, i1 false), !noalias !6
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %tmp2, i64 1, i32 8, i1 false), !noalias !6
 
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
 
 !0 = !{!0}
 !1 = distinct !{!1, !0, !"in"}