Change memcpy/memset/memmove to have dest and source alignments.

Note, this was reviewed (and more details are in) http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html

These intrinsics currently have an explicit alignment argument which is
required to be a constant integer.  It represents the alignment of the
source and dest, and so must be the minimum of those.

This change allows source and dest to each have their own alignments
by using the alignment attribute on their arguments.  The alignment
argument itself is removed.

There are a few places in the code for which the code needs to be
checked by an expert as to whether using only src/dest alignment is
safe.  For those places, they currently take the minimum of src/dest
alignments which matches the current behaviour.

For example, code which used to read:
  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
will now read:
  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false)

For out of tree owners, I was able to strip alignment from calls using sed by replacing:
  (call.*llvm\.memset.*)i32\ [0-9]*\,\ i1 false\)
with:
  $1i1 false)

and similarly for memmove and memcpy.

I then added back in alignment to test cases which needed it.

A similar commit will be made to clang which actually has many differences in alignment as now
IRBuilder can generate different source/dest alignments on calls.

In IRBuilder itself, a new argument was added.  Instead of calling:
  CreateMemCpy(Dst, Src, getInt64(Size), DstAlign, /* isVolatile */ false)
you now call
  CreateMemCpy(Dst, Src, getInt64(Size), DstAlign, SrcAlign, /* isVolatile */ false)

There is a temporary class (IntegerAlignment) which takes the source alignment and rejects
implicit conversion from bool.  This is to prevent isVolatile here from passing its default
parameter to the source alignment.

Note, changes in future can now be made to codegen.  I didn't change anything here, but this
change should enable better memcpy code sequences.

Reviewed by Hal Finkel.

llvm-svn: 253511
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
index 851e6dc..b3884ff 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -180,11 +180,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %0 = bitcast i32* %a to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo
-; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
+; CHECK: @llvm.memset.p0i8.i64(i8* align 32 %0, i8 0, i64 64, i1 false)
 ; CHECK: ret i32 undef
 }
 
@@ -200,16 +200,16 @@
   tail call void @llvm.assume(i1 %maskcond4)
   %0 = bitcast i32* %a to i8*
   %1 = bitcast i32* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo2
-; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 32 %1, i64 64, i1 false)
 ; CHECK: ret i32 undef
 }
 
 declare void @llvm.assume(i1) nounwind
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
index 2edc2e9..01e71e5 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -180,11 +180,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %0 = bitcast i32* %a to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo
-; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
+; CHECK: @llvm.memset.p0i8.i64(i8* align 32 %0, i8 0, i64 64, i1 false)
 ; CHECK: ret i32 undef
 }
 
@@ -200,16 +200,16 @@
   tail call void @llvm.assume(i1 %maskcond4)
   %0 = bitcast i32* %a to i8*
   %1 = bitcast i32* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i1 false)
   ret i32 undef
 
 ; CHECK-LABEL: @moo2
-; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 32 %1, i64 64, i1 false)
 ; CHECK: ret i32 undef
 }
 
 declare void @llvm.assume(i1) nounwind
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
diff --git a/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll b/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
index a6ea27f..cace3da 100644
--- a/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -5,7 +5,7 @@
 %class.QBezier.15 = type { double, double, double, double, double, double, double, double }
 
 ; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0
 
 ; Function Attrs: uwtable
 declare fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval nocapture readonly align 8) #1
@@ -55,7 +55,7 @@
   %v2 = bitcast %class.QBezier.15* %agg.tmp56.i to i8*
   call void @llvm.lifetime.start(i64 64, i8* %v2)
   %v3 = bitcast [10 x %class.QBezier.15]* %beziers to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i1 false)
   call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
   %x2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
   %v4 = load double, double* %x2.i, align 16
@@ -130,9 +130,9 @@
   %y454.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 7
   store double %mul52.i, double* %y454.i, align 8
   %v22 = bitcast %class.QBezier.15* %add.ptr to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v1, i8* %v22, i64 64, i1 false)
   call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp55.i)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v2, i8* %v3, i64 64, i1 false)
   call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp56.i)
   call void @llvm.lifetime.end(i64 64, i8* %v0)
   call void @llvm.lifetime.end(i64 64, i8* %v1)
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
index 1e12c01..f4c1af5 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
@@ -9,7 +9,7 @@
 ; - TLI::has (always returns false thanks to -disable-simplify-libcalls)
 
 ; CHECK-NOT: _chk
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %len, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %len, i1 false)
 define void @test_nobuiltin(i8* %dst, i64 %len) {
   call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin
   ret void
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll b/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
index 6fb4cb6..e5882f12 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -30,10 +30,10 @@
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
 define void @test4(i8* %dest, i8* %src) {
 ; CHECK: test4
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
   br label %bb
 bb:
   icmp ne i8* %dest, null
@@ -42,10 +42,10 @@
   ret void
 }
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
 define void @test5(i8* %dest, i8* %src) {
 ; CHECK: test5
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
   br label %bb
 bb:
   icmp ne i8* %dest, null
@@ -54,10 +54,10 @@
   ret void
 }
 
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
 define void @test6(i8* %dest) {
 ; CHECK: test6
-  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i1 false)
   br label %bb
 bb:
   icmp ne i8* %dest, null
@@ -67,7 +67,7 @@
 
 define void @test7(i8* %dest, i8* %src, i32 %len) {
 ; CHECK: test7
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false)
   br label %bb
 bb:
   %KEEP1 = icmp ne i8* %dest, null
@@ -77,10 +77,10 @@
   ret void
 }
 
-declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) *, i8 addrspace(1) *, i32, i32, i1)
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) *, i8 addrspace(1) *, i32, i1)
 define void @test8(i8 addrspace(1) * %dest, i8 addrspace(1) * %src) {
 ; CHECK: test8
-  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) * %dest, i8 addrspace(1) * %src, i32 1, i32 1, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) * %dest, i8 addrspace(1) * %src, i32 1, i1 false)
   br label %bb
 bb:
   %KEEP1 = icmp ne i8 addrspace(1) * %dest, null
@@ -92,7 +92,7 @@
 
 define void @test9(i8* %dest, i8* %src) {
 ; CHECK: test9
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 true)
   br label %bb
 bb:
   %KEEP1 = icmp ne i8* %dest, null
diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
index d30e9a2..665d772 100644
--- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
@@ -60,7 +60,7 @@
 %struct.AttrListPtr = type { %struct.AttributeListImpl* }
 %struct.AttributeListImpl = type opaque
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 ; CHECK: _ZSt9iter_swapIPSt4pairIPN4llvm10BasicBlockEjES5_EvT_T0_
 ; CHECK: store
@@ -78,8 +78,8 @@
   store i32 %5, i32* %3, align 8
   %6 = bitcast %struct.pair.162* %__a to i8*
   %7 = bitcast %struct.pair.162* %__b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 12, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 12, i1 false)
   %8 = bitcast %struct.pair.162* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 12, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 12, i1 false)
   ret void
 }
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
index de7a4cc..863fc9e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -12,8 +12,8 @@
 entry:
   %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
   %p3 = bitcast i32* %arrayidx0 to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 24, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
@@ -23,8 +23,8 @@
 ; CHECK-LABEL: @write28to32(
 entry:
   %p3 = bitcast i32* %p to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
@@ -34,8 +34,8 @@
 ; CHECK-LABEL: @dontwrite28to32memset(
 entry:
   %p3 = bitcast i32* %p to i8*
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i1 false)
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
@@ -45,8 +45,8 @@
 ; CHECK-LABEL: @write32to36(
 entry:
   %0 = bitcast %struct.vec2plusi* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i1 false)
   %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2
   store i32 1, i32* %c, align 4
   ret void
@@ -56,8 +56,8 @@
 ; CHECK-LABEL: @write16to32(
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
   %c = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 1
   store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
   ret void
@@ -67,15 +67,15 @@
 ; CHECK-LABEL: @dontwrite28to32memcpy(
 entry:
   %0 = bitcast %struct.vec2* %p to i8*
-; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
   %arrayidx1 = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 0, i64 7
   store i32 1, i32* %arrayidx1, align 4
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 %struct.trapframe = type { i64, i64, i64 }
 
@@ -87,8 +87,8 @@
   %add.ptr = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1
   %1 = bitcast %struct.trapframe* %add.ptr to i8*
   %2 = bitcast %struct.trapframe* %md_regs to i8*
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i1 false)
   %tf_trapno = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1, i32 1
   store i64 3, i64* %tf_trapno, align 8
   ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/crash.ll b/llvm/test/Transforms/DeadStoreElimination/crash.ll
index 78cb842..9276569 100644
--- a/llvm/test/Transforms/DeadStoreElimination/crash.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/crash.ll
@@ -36,11 +36,11 @@
   %6 = getelementptr inbounds i16, i16* %2, i64 undef  ; <i16*> [#uses=1]
   store i16 undef, i16* %6, align 2
   %7 = getelementptr inbounds i8, i8* %5, i64 undef   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 
 ; rdar://7635088
diff --git a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll b/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
index 30bb96f..2086bc9 100644
--- a/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
@@ -9,7 +9,7 @@
 %union.anon = type { i64, [8 x i8] }
 
 ; Function Attrs: nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0
 
 ; Function Attrs: noinline nounwind readonly uwtable
 declare zeroext i1 @callee_takes_string(%class.basic_string* nonnull) #1 align 2
@@ -61,10 +61,10 @@
 ; CHECK: store i8 0, i8* %tmp14, align 1
 
   %tmp17 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp1)
-  call void @llvm.memset.p0i8.i64(i8* %tmp11, i8 -51, i64 16, i32 8, i1 false) #0
-  call void @llvm.memset.p0i8.i64(i8* %tmp15, i8 -51, i64 32, i32 8, i1 false) #0
-  call void @llvm.memset.p0i8.i64(i8* %tmp4, i8 -51, i64 16, i32 8, i1 false) #0
-  call void @llvm.memset.p0i8.i64(i8* %tmp8, i8 -51, i64 32, i32 8, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp11, i8 -51, i64 16, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp15, i8 -51, i64 32, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp4, i8 -51, i64 16, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* %tmp8, i8 -51, i64 32, i1 false) #0
   ret i1 %tmp17
 }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/lifetime.ll
index 305c916..87c3104 100644
--- a/llvm/test/Transforms/DeadStoreElimination/lifetime.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -4,7 +4,7 @@
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind
 
 define void @test1() {
 ; CHECK-LABEL: @test1(
@@ -14,7 +14,7 @@
   call void @llvm.lifetime.end(i64 1, i8* %A)
 ; CHECK: lifetime.end
 
-  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false)
 ; CHECK-NOT: memset
 
   ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
index 5bbb8e0..aa9f1a1 100644
--- a/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -S -dse < %s | FileCheck %s
 
-declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
-declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind
 
 define void @test1() {
 ; CHECK-LABEL: @test1(
@@ -12,7 +12,7 @@
   store i8 0, i8* %A  ;; Written to by memcpy
 ; CHECK-NOT: store
 
-  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -26,7 +26,7 @@
   store i8 0, i8* %A  ;; Written to by memmove
 ; CHECK-NOT: store
 
-  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -40,7 +40,7 @@
   store i8 0, i8* %A  ;; Written to by memset
 ; CHECK-NOT: store
 
-  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false)
 
   ret void
 ; CHECK: ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll b/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll
index f9262ed..b66b75e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 define void @fn(i8* nocapture %buf) #0 {
 entry:
@@ -13,9 +13,9 @@
 ; CHECK: ret void
 
   %arrayidx = getelementptr i8, i8* %buf, i64 18
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i1 false)
   store i8 1, i8* %arrayidx, align 1
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i1 false)
   ret void
 }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll b/llvm/test/Transforms/DeadStoreElimination/pr11390.ll
index faf3b8b..6105a2e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/pr11390.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -17,12 +17,12 @@
   br i1 %tobool, label %return, label %if.end
 
 if.end:                                           ; preds = %entry
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i1 false)
   %arrayidx = getelementptr inbounds i8, i8* %call4, i64 %call
   store i8 46, i8* %arrayidx, align 1
 ; CHECK: store i8 46
   %add.ptr5 = getelementptr inbounds i8, i8* %call4, i64 %add
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i1 false)
   %arrayidx8 = getelementptr inbounds i8, i8* %call4, i64 %add2
   store i8 0, i8* %arrayidx8, align 1
   br label %return
@@ -35,4 +35,4 @@
 
 declare noalias i8* @malloc(i64) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index 4f6221d..349713e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -basicaa -dse -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 declare void @llvm.init.trampoline(i8*, i8*, i8*)
 
 define void @test1(i32* %Q, i32* %P) {
@@ -63,7 +63,7 @@
 ; alias).
 define void @test6(i32 *%p, i8 *%q) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK-LABEL: @test6(
@@ -74,7 +74,7 @@
 ; alias).
 define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK-LABEL: @test7(
@@ -208,8 +208,8 @@
 
 ;; Fully dead overwrite of memcpy.
 define void @test15(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
   ret void
 ; CHECK-LABEL: @test15(
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -218,8 +218,8 @@
 
 ;; Full overwrite of smaller memcpy.
 define void @test16(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
   ret void
 ; CHECK-LABEL: @test16(
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -228,8 +228,8 @@
 
 ;; Overwrite of memset by memcpy.
 define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
   ret void
 ; CHECK-LABEL: @test17(
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -238,8 +238,8 @@
 
 ; Should not delete the volatile memset.
 define void @test17v(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 true)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
   ret void
 ; CHECK-LABEL: @test17v(
 ; CHECK-NEXT: call void @llvm.memset
@@ -252,8 +252,8 @@
 ; A = B
 ; A = A
 define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
   ret void
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT: call void @llvm.memcpy
diff --git a/llvm/test/Transforms/GVN/nonescaping-malloc.ll b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
index f83b317..ea85924 100644
--- a/llvm/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
@@ -80,7 +80,7 @@
 _ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb.i.i, %bb4.i
   %tmp.i18.i.i = getelementptr inbounds i8, i8* %tmp.i20.i.i, i64 16
   %tmp15.i.i = zext i32 %tmp4.i.i to i64
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i1 false)
   %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16
   %tmp17.i.i = getelementptr inbounds i8, i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i
   store i8 0, i8* %tmp17.i.i, align 1
@@ -106,4 +106,4 @@
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
diff --git a/llvm/test/Transforms/GVN/pr17732.ll b/llvm/test/Transforms/GVN/pr17732.ll
index 6c40ccf..1410683 100644
--- a/llvm/test/Transforms/GVN/pr17732.ll
+++ b/llvm/test/Transforms/GVN/pr17732.ll
@@ -14,10 +14,10 @@
 
 define i32 @main() {
 entry:
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array, %struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }, { [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array, %struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }, { [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i1 false)
   %0 = load i8, i8* getelementptr inbounds (%struct.with_array, %struct.with_array* @array_with_zeroinit, i64 0, i32 2), align 4
 
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector, %struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }, { <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector, %struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }, { <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i1 false)
   %1 = load i8, i8* getelementptr inbounds (%struct.with_vector, %struct.with_vector* @vector_with_zeroinit, i64 0, i32 2), align 4
   %conv0 = sext i8 %0 to i32
   %conv1 = sext i8 %1 to i32
@@ -27,4 +27,4 @@
 ; CHECK: ret i32 1
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
diff --git a/llvm/test/Transforms/GVN/rle.ll b/llvm/test/Transforms/GVN/rle.ll
index 3f42135..bc514d6 100644
--- a/llvm/test/Transforms/GVN/rle.ll
+++ b/llvm/test/Transforms/GVN/rle.ll
@@ -27,7 +27,7 @@
 ;; No PR filed, crashed in CaptureTracker.
 declare void @helper()
 define void @crash1() {
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i32 1, i1 false) nounwind
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) nounwind
   %tmp = load i8, i8* bitcast (void ()* @helper to i8*)
   %x = icmp eq i8 %tmp, 15
   ret void
@@ -142,7 +142,7 @@
 define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
 entry:
   %conv = bitcast i16* %A to i8* 
-  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)
   %arrayidx = getelementptr inbounds i16, i16* %A, i64 42
   %tmp2 = load i16, i16* %arrayidx
   ret i16 %tmp2
@@ -155,7 +155,7 @@
 define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 42 ; <float*> [#uses=1]
   %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -175,11 +175,11 @@
   %P3 = bitcast i16* %P to i8*
   br i1 %cond, label %T, label %F
 T:
-  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i1 false)
   br label %Cont
   
 F:
-  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i1 false)
   br label %Cont
 
 Cont:
@@ -201,7 +201,7 @@
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -214,7 +214,7 @@
 define float @memcpy_to_float_local_as1(float* %A) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -565,7 +565,7 @@
 entry:
   %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
   %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %x, i32 0, i32 0 ; <i32*>
   %tmp1 = load i32, i32* %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
@@ -669,10 +669,10 @@
 ; CHECK: ret i32
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
 
 
 ;;===----------------------------------------------------------------------===;;
diff --git a/llvm/test/Transforms/GlobalOpt/crash.ll b/llvm/test/Transforms/GlobalOpt/crash.ll
index 8e39931..883b72e 100644
--- a/llvm/test/Transforms/GlobalOpt/crash.ll
+++ b/llvm/test/Transforms/GlobalOpt/crash.ll
@@ -60,10 +60,10 @@
 
 @data8 = internal global [8000 x i8] zeroinitializer, align 16
 define void @memset_with_strange_user() ssp {
-  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([8000 x i8], [8000 x i8]* @data8, i64 0, i64 0), i8 undef, i64 ptrtoint (i8* getelementptr ([8000 x i8], [8000 x i8]* @data8, i64 1, i64 sub (i64 0, i64 ptrtoint ([8000 x i8]* @data8 to i64))) to i64), i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([8000 x i8], [8000 x i8]* @data8, i64 0, i64 0), i8 undef, i64 ptrtoint (i8* getelementptr ([8000 x i8], [8000 x i8]* @data8, i64 1, i64 sub (i64 0, i64 ptrtoint ([8000 x i8]* @data8 to i64))) to i64), i1 false)
   ret void
 }
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 
 ; PR9856
diff --git a/llvm/test/Transforms/GlobalOpt/memcpy.ll b/llvm/test/Transforms/GlobalOpt/memcpy.ll
index 437142e..b2b192b 100644
--- a/llvm/test/Transforms/GlobalOpt/memcpy.ll
+++ b/llvm/test/Transforms/GlobalOpt/memcpy.ll
@@ -6,8 +6,8 @@
 define void @foo() {
   %Blah = alloca [58 x i8]
   %tmp.0 = getelementptr [58 x i8], [58 x i8]* %Blah, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp.0, i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp.0, i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G1, i32 0, i32 0), i32 58, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/GlobalOpt/memset-null.ll b/llvm/test/Transforms/GlobalOpt/memset-null.ll
index 838ac09..1a1f550 100644
--- a/llvm/test/Transforms/GlobalOpt/memset-null.ll
+++ b/llvm/test/Transforms/GlobalOpt/memset-null.ll
@@ -8,12 +8,12 @@
 @a = global %struct.A zeroinitializer, align 4
 @llvm.global_ctors = appending global [2 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I_a }, %0 { i32 65535, void ()* @_GLOBAL__I_b }]
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 ; CHECK-NOT: GLOBAL__I_a
 define internal void @_GLOBAL__I_a() nounwind {
 entry:
-  tail call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.A* @a to i8*), i8 0, i64 400, i32 4, i1 false) nounwind
+  tail call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.A* @a to i8*), i8 0, i64 400, i1 false) nounwind
   ret void
 }
 
@@ -24,6 +24,6 @@
 define internal void @_GLOBAL__I_b() nounwind {
 entry:
   %tmp.i.i.i = load i8*, i8** @y, align 8
-  tail call void @llvm.memset.p0i8.i64(i8* %tmp.i.i.i, i8 0, i64 10, i32 1, i1 false) nounwind
+  tail call void @llvm.memset.p0i8.i64(i8* %tmp.i.i.i, i8 0, i64 10, i1 false) nounwind
   ret void
 }
diff --git a/llvm/test/Transforms/GlobalOpt/memset.ll b/llvm/test/Transforms/GlobalOpt/memset.ll
index 1dfdd64..02bef2c 100644
--- a/llvm/test/Transforms/GlobalOpt/memset.ll
+++ b/llvm/test/Transforms/GlobalOpt/memset.ll
@@ -10,8 +10,8 @@
 define void @foo() {
   %Blah = alloca [58 x i8]
   %tmp3 = bitcast [58 x i8]* %Blah to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([58 x i8], [58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i1 false)
   ret void
 }
 
@@ -21,11 +21,11 @@
 define void @foo_as1() {
   %Blah = alloca [58 x i8]
   %tmp3 = bitcast [58 x i8]* %Blah to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* addrspacecast ([4 x i32] addrspace(1)* @G1_as1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
-  call void @llvm.memset.p1i8.i32(i8 addrspace(1)* getelementptr inbounds ([58 x i8], [58 x i8] addrspace(1)* @G0_as1, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* addrspacecast ([4 x i32] addrspace(1)* @G1_as1 to i8*), i8* %tmp3, i32 16, i1 false)
+  call void @llvm.memset.p1i8.i32(i8 addrspace(1)* getelementptr inbounds ([58 x i8], [58 x i8] addrspace(1)* @G0_as1, i32 0, i32 0), i8 17, i32 58, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
-declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture, i8, i32, i32, i1) nounwind
\ No newline at end of file
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture, i8, i32, i1) nounwind
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
index 3957531..67b5789 100644
--- a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -31,7 +31,7 @@
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #2
 
 ; Function Attrs: nounwind
 define void @_Z3fn4v() #0 !dbg !21 {
@@ -46,14 +46,14 @@
   %agg.tmp.sroa.0.0.copyload = load i32, i32* getelementptr inbounds (%struct.A, %struct.A* @b, i64 0, i32 0), align 8, !dbg !50
   tail call void @llvm.dbg.value(metadata i32 %agg.tmp.sroa.0.0.copyload, i64 0, metadata !46, metadata !51), !dbg !49
   %agg.tmp.sroa.3.0..sroa_idx = getelementptr inbounds [20 x i8], [20 x i8]* %agg.tmp.sroa.3, i64 0, i64 0, !dbg !50
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.tmp.sroa.3.0..sroa_idx, i8* getelementptr (i8, i8* bitcast (%struct.A* @b to i8*), i64 4), i64 20, i32 4, i1 false), !dbg !50
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.tmp.sroa.3.0..sroa_idx, i8* getelementptr (i8, i8* bitcast (%struct.A* @b to i8*), i64 4), i64 20, i1 false), !dbg !50
   tail call void @llvm.dbg.declare(metadata %struct.A* undef, metadata !46, metadata !31) #2, !dbg !49
   %tobool.i = icmp eq i32 %agg.tmp.sroa.0.0.copyload, 0, !dbg !52
   br i1 %tobool.i, label %_Z3fn31A.exit, label %if.then.i, !dbg !53
 
 if.then.i:                                        ; preds = %entry
   store i32 %agg.tmp.sroa.0.0.copyload, i32* getelementptr inbounds (%struct.A, %struct.A* @a, i64 0, i32 0), align 8, !dbg !54
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 4), i8* %agg.tmp.sroa.3.0..sroa_idx, i64 20, i32 4, i1 false), !dbg !54
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 4), i8* %agg.tmp.sroa.3.0..sroa_idx, i64 20, i1 false), !dbg !54
   br label %_Z3fn31A.exit, !dbg !54
 
 _Z3fn31A.exit:                                    ; preds = %entry, %if.then.i
diff --git a/llvm/test/Transforms/Inline/inline-invoke-tail.ll b/llvm/test/Transforms/Inline/inline-invoke-tail.ll
index f4b8065..2fa1fe1 100644
--- a/llvm/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/llvm/test/Transforms/Inline/inline-invoke-tail.ll
@@ -4,7 +4,7 @@
 define internal void @foo(i32* %p, i32* %q) {
 	%pp = bitcast i32* %p to i8*
 	%qq = bitcast i32* %q to i8*
-	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i32 1, i1 false)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i1 false)
 	ret void
 }
 
@@ -30,4 +30,4 @@
 
 declare i32 @__gxx_personality_v0(...)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/Inline/inline-vla.ll b/llvm/test/Transforms/Inline/inline-vla.ll
index df21b3f..5d82067 100644
--- a/llvm/test/Transforms/Inline/inline-vla.ll
+++ b/llvm/test/Transforms/Inline/inline-vla.ll
@@ -21,13 +21,13 @@
 entry:
   %vla = alloca i64, i64 %size, align 16
   %0 = bitcast i64* %vla to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %src, i64 %size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %0, i64 %size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %src, i64 %size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %0, i64 %size, i1 false)
   ret void
 }
 
 ; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #2
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { inlinehint nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/Inline/noalias-calls.ll b/llvm/test/Transforms/Inline/noalias-calls.ll
index 56d5c6d..23d545d 100644
--- a/llvm/test/Transforms/Inline/noalias-calls.ll
+++ b/llvm/test/Transforms/Inline/noalias-calls.ll
@@ -2,17 +2,17 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0
 declare void @hey() #0
 
 define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 {
 entry:
   %l = alloca i8, i32 512, align 1
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 0)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 0)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i1 0)
   call void @hey()
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l, i8* %c, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l, i8* %c, i64 16, i1 0)
   ret void
 }
 
@@ -24,11 +24,11 @@
 
 ; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
 ; CHECK: entry:
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #1, !noalias !0
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #1, !alias.scope !5
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 false) #1, !noalias !0
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i1 false) #1, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i1 false) #1, !alias.scope !5
 ; CHECK:   call void @hey() #1, !noalias !5
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i1 false) #1, !noalias !3
 ; CHECK:   ret void
 ; CHECK: }
 
diff --git a/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index 67a94e5..427d0e3 100644
--- a/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -10,11 +10,11 @@
   store i8* %P, i8** %P_addr
   %tmp = load i8*, i8** %P_addr, align 4
   %tmp1 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i1 false)
   br label %return
 
 return:                                           ; preds = %entry
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index 0f8b38c..c976ce0 100644
--- a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -64,7 +64,7 @@
   %val.i = load i32*, i32** %elt.i
   %tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*
   %tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i1 false)
   %26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %val.i, i32** %26
   %27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
@@ -276,4 +276,4 @@
   ret i32* %tmp14
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/addrspacecast.ll b/llvm/test/Transforms/InstCombine/addrspacecast.ll
index 27f6b72..e3423fd 100644
--- a/llvm/test/Transforms/InstCombine/addrspacecast.ll
+++ b/llvm/test/Transforms/InstCombine/addrspacecast.ll
@@ -3,9 +3,9 @@
 target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
 
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i1) nounwind
 
 
 define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
@@ -127,7 +127,7 @@
 define i32 @memcpy_addrspacecast() nounwind {
 entry:
   %alloca = alloca i8, i32 48
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i32 4, i1 false) nounwind
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca, i8 addrspace(1)* addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i1 false) nounwind
   br label %loop.body
 
 loop.body:
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index ab0ae39..d92dadd 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -80,7 +80,7 @@
   ret double %t
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 declare void @use(i8*)
 
@@ -90,8 +90,8 @@
 ; Check that the alignment is bumped up the alignment of the sret type.
 ; CHECK-LABEL: @test3(
   %a4.cast = bitcast %struct.s* %a4 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 1, i1 false)
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %a4.cast, i8 0, i64 16, i1 false)
   call void @use(i8* %a4.cast)
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/alloca.ll b/llvm/test/Transforms/InstCombine/alloca.ll
index b61b75e..9975d1d 100644
--- a/llvm/test/Transforms/InstCombine/alloca.ll
+++ b/llvm/test/Transforms/InstCombine/alloca.ll
@@ -105,11 +105,11 @@
 entry:
   %0 = alloca %real_type, align 4
   %1 = bitcast %real_type* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 
 ; Check that the GEP indices use the pointer size, or 64 if unknown
@@ -145,7 +145,7 @@
   %0 = getelementptr inbounds <{ %struct_type }>, <{ %struct_type }>* %argmem, i32 0, i32 0
   %1 = bitcast %struct_type* %0 to i8*
   %2 = bitcast %struct_type* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 8, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 8, i1 false)
   call void @test9_aux(<{ %struct_type }>* inalloca %argmem)
   call void @llvm.stackrestore(i8* %inalloca.save)
   ret void
diff --git a/llvm/test/Transforms/InstCombine/call-intrinsics.ll b/llvm/test/Transforms/InstCombine/call-intrinsics.ll
index 3e37a71..b9bbcb7 100644
--- a/llvm/test/Transforms/InstCombine/call-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/call-intrinsics.ll
@@ -3,17 +3,17 @@
 @X = global i8 0                ; <i8*> [#uses=3]
 @Y = global i8 12               ; <i8*> [#uses=2]
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
 
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
 
 define void @zero_byte_test() {
         ; These process zero bytes, so they are a noop.
-        call void @llvm.memmove.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i32 128, i1 false )
-        call void @llvm.memcpy.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i32 128, i1 false )
-        call void @llvm.memset.p0i8.i32( i8* @X, i8 123, i32 0, i32 128, i1 false )
+        call void @llvm.memmove.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i1 false )
+        call void @llvm.memcpy.p0i8.p0i8.i32( i8* @X, i8* @Y, i32 0, i1 false )
+        call void @llvm.memset.p0i8.i32( i8* @X, i8 123, i32 0, i1 false )
         ret void
 }
 
diff --git a/llvm/test/Transforms/InstCombine/malloc-free-delete.ll b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
index 138001a..4720fe1 100644
--- a/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -27,9 +27,9 @@
 declare void @llvm.lifetime.start(i64, i8*)
 declare void @llvm.lifetime.end(i64, i8*)
 declare i64 @llvm.objectsize.i64(i8*, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) nounwind
 
 define void @test3(i8* %src) {
 ; CHECK-LABEL: @test3(
@@ -39,9 +39,9 @@
   call void @llvm.lifetime.end(i64 10, i8* %a)
   %size = call i64 @llvm.objectsize.i64(i8* %a, i1 true)
   store i8 42, i8* %a
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i32 1, i1 false)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i32 1, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %a, i8 5, i32 32, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %src, i32 32, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 5, i32 32, i1 false)
   %alloc2 = call noalias i8* @calloc(i32 5, i32 7) nounwind
   %z = icmp ne i8* %alloc2, null
   ret void
@@ -82,12 +82,12 @@
   %e = call i8* @malloc(i32 700)
   %f = call i8* @malloc(i32 700)
   %g = call i8* @malloc(i32 700)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %a, i32 32, i32 1, i1 false)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %b, i32 32, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %a, i32 32, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %b, i32 32, i1 false)
   store i8* %c, i8** %esc
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %ptr, i32 32, i32 1, i1 true)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %e, i8* %ptr, i32 32, i32 1, i1 true)
-  call void @llvm.memset.p0i8.i32(i8* %f, i8 5, i32 32, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %ptr, i32 32, i1 true)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %e, i8* %ptr, i32 32, i1 true)
+  call void @llvm.memset.p0i8.i32(i8* %f, i8 5, i32 32, i1 true)
   store volatile i8 4, i8* %g
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
index da38087..edf95b7 100644
--- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -6,7 +6,7 @@
 entry:
 	%lookupTable = alloca [128 x float], align 16		; <[128 x float]*> [#uses=5]
 	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i32 16, i1 false)
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i1 false)
 
 ; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
@@ -36,10 +36,10 @@
 	ret float %tmp43
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
 
 %T = type { i8, [123 x i8] }
 %U = type { i32, i32, i32, i32, i32 }
@@ -60,9 +60,9 @@
 ; CHECK-NEXT: getelementptr inbounds [124 x i8], [124 x i8]*
 
 ; use @G instead of %A
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 16 getelementptr inbounds (%T, %T* @G, i64 0, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i1 false)
   call void @bar(i8* %b)
   ret void
 }
@@ -83,9 +83,9 @@
 ; CHECK-NEXT: addrspacecast
 
 ; use @G instead of %A
-; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %{{.*}},
-  call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %b, i8 addrspace(1)* %a, i64 124, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %{{.*}},
+  call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %b, i8 addrspace(1)* %a, i64 124, i1 false)
   call void @bar_as1(i8 addrspace(1)* %b)
   ret void
 }
@@ -98,7 +98,7 @@
 define void @test3() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -108,7 +108,7 @@
 define void @test3_addrspacecast() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%T* @G to i8 addrspace(1)*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%T* @G to i8 addrspace(1)*), i64 124, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test3_addrspacecast(
 ; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -119,7 +119,7 @@
 define void @test4() {
   %A = alloca %T
   %a = bitcast %T* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
   call void @baz(i8* byval %a)
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -131,7 +131,7 @@
   %A = alloca %T
   %a = bitcast %T* %A to i8*
   call void @llvm.lifetime.start(i64 -1, i8* %a)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i1 false)
   call void @baz(i8* byval %a)
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T, %T* @G, i64 0, i32 0))
@@ -145,7 +145,7 @@
 define void @test6() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
@@ -155,7 +155,7 @@
 define void @test7() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
@@ -165,7 +165,7 @@
 define void @test8() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test8(
 ; CHECK: llvm.memcpy
@@ -177,7 +177,7 @@
 define void @test8_addrspacecast() {
   %A = alloca %U, align 16
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test8_addrspacecast(
 ; CHECK: llvm.memcpy
@@ -188,7 +188,7 @@
 define void @test9() {
   %A = alloca %U, align 4
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*))
@@ -198,7 +198,7 @@
 define void @test9_addrspacecast() {
   %A = alloca %U, align 4
   %a = bitcast %U* %A to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* %a, i8 addrspace(1)* addrspacecast (%U* getelementptr ([2 x %U], [2 x %U]* @H, i64 0, i32 1) to i8 addrspace(1)*), i64 20, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK-LABEL: @test9_addrspacecast(
 ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*))
diff --git a/llvm/test/Transforms/InstCombine/memcpy-to-load.ll b/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
index bcc9e18..353f362 100644
--- a/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -6,8 +6,8 @@
 entry:
   %tmp2 = bitcast double* %X to i8*
   %tmp13 = bitcast double* %Y to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memcpy.ll b/llvm/test/Transforms/InstCombine/memcpy.ll
index f66e14c..9d502e2 100644
--- a/llvm/test/Transforms/InstCombine/memcpy.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 define void @test1(i8* %a) {
-        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 false)
         ret void
 ; CHECK-LABEL: define void @test1(
 ; CHECK-NEXT: ret void
@@ -13,14 +13,14 @@
 
 ; PR8267
 define void @test2(i8* %a) {
-        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 true)
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 true)
         ret void
 ; CHECK-LABEL: define void @test2(
 ; CHECK-NEXT: call void @llvm.memcpy
 }
 
 define void @test3(i8* %d, i8* %s) {
-        tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i32 4, i1 false)
+        tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i1 false)
         ret void
 ; CHECK-LABEL: define void @test3(
 ; CHECK-NEXT: call void @llvm.memcpy
diff --git a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
index ddaaf82..a372ef2 100644
--- a/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -20,7 +20,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
   ret i8* %ret
@@ -31,7 +31,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T3* @t3 to i8*), i64 1824, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/memmove.ll b/llvm/test/Transforms/InstCombine/memmove.ll
index 96f230e..7f43405 100644
--- a/llvm/test/Transforms/InstCombine/memmove.ll
+++ b/llvm/test/Transforms/InstCombine/memmove.ll
@@ -8,13 +8,13 @@
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
 define void @test1(i8* %A, i8* %B, i32 %N) {
-	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i32 1, i1 false)
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i1 false)
 	ret void
 }
 
 define void @test2(i8* %A, i32 %N) {
         ;; dest can't alias source since we can't write to source!
-	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1, i1 false)
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i1 false)
 	ret void
 }
 
@@ -24,16 +24,16 @@
 	%hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
 	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
 	%target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i1 false)
 	ret i32 0
 }
 
 ; PR2370
 define void @test4(i8* %a) {
-  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 false)
   ret void
 }
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
index e4e1f6e..f006985 100644
--- a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -20,7 +20,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T2* @t2 to i8*
 
-; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
   ret i8* %ret
@@ -31,7 +31,7 @@
   %dst = bitcast %struct.T1* @t1 to i8*
   %src = bitcast %struct.T3* @t3 to i8*
 
-; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.T1* @t1 to i8*), i8* align 4 bitcast (%struct.T3* @t3 to i8*), i64 1824, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*)
   %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/memset.ll b/llvm/test/Transforms/InstCombine/memset.ll
index dfafcf9..3f34465 100644
--- a/llvm/test/Transforms/InstCombine/memset.ll
+++ b/llvm/test/Transforms/InstCombine/memset.ll
@@ -3,12 +3,12 @@
 define i32 @main() {
   %target = alloca [1024 x i8]
   %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i32 1, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i32 1, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 2, i32 2, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 4, i32 4, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 8, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 2, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 8, i1 false)
   ret i32 0
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memset2.ll b/llvm/test/Transforms/InstCombine/memset2.ll
index ad4c225..5999c5a5 100644
--- a/llvm/test/Transforms/InstCombine/memset2.ll
+++ b/llvm/test/Transforms/InstCombine/memset2.ll
@@ -8,8 +8,8 @@
 entry:
 ; CHECK: bitcast i8 addrspace(1)* %gep to i64 addrspace(1)*
 	%gep = getelementptr inbounds %struct.Moves, %struct.Moves addrspace(1)* %moves, i32 1, i32 0, i32 9
-	 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %gep, i8 0, i64 8, i32 1, i1 false)                                                                     
+	 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %gep, i8 0, i64 8, i1 false)                                                                     
 	ret i32 0
 }
 
-declare void @llvm.memset.p1i8.i64(i8addrspace(1)* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p1i8.i64(i8addrspace(1)* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
index 56ea14c..e0d7376 100644
--- a/llvm/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
@@ -15,7 +15,7 @@
 ; CHECK-LABEL: @test_simplify1(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
   %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
   ret i8* %ret
@@ -25,7 +25,7 @@
 ; CHECK-LABEL: @test_simplify2(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
   %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
   ret i8* %ret
@@ -35,7 +35,7 @@
 ; CHECK-LABEL: @test_simplify3(
   %dst = bitcast %struct.T* @t to i8*
 
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
 ; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*)
   %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
   ret i8* %ret
@@ -79,7 +79,7 @@
   %sub183 = ptrtoint i8* %b to i64
   %sub184 = sub i64 %sub182, %sub183
   %add52.i.i = add nsw i64 %sub184, 1
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %strchr2
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %strchr2
   %call185 = call i8* @__memset_chk(i8* %call51i, i32 0, i64 %add52.i.i, i64 -1)
   ret i32 4
 }
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
index 2af391f..14ea2ed 100644
--- a/llvm/test/Transforms/InstCombine/objsize.ll
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -112,7 +112,7 @@
   %1 = bitcast %struct.data* %0 to i8*
   %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false) nounwind
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
+; CHECK: @llvm.memset.p0i8.i32(i8* align 8 %1, i8 0, i32 1824, i1 false)
   %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
   store i8* %1, i8** %esc
   ret i32 0
@@ -128,7 +128,7 @@
   %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
   %2 = load i8*, i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 %1, i32 10, i1 false)
   %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
   ret i8* %0
 }
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
index 7cc6710..9d0d8e2 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -120,9 +120,9 @@
   %hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0
   %target = alloca [1024 x i8]
   %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i1 false)
   ret i32 0
 
 ; CHECK-LABEL: @MemCpy(
@@ -130,7 +130,7 @@
 ; CHECK: ret i32 0
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 declare i32 @strcmp(i8*, i8*) #0
 
diff --git a/llvm/test/Transforms/InstCombine/sprintf-1.ll b/llvm/test/Transforms/InstCombine/sprintf-1.ll
index ddf2f2f..1fbdc43 100644
--- a/llvm/test/Transforms/InstCombine/sprintf-1.ll
+++ b/llvm/test/Transforms/InstCombine/sprintf-1.ll
@@ -22,7 +22,7 @@
 ; CHECK-LABEL: @test_simplify1(
   %fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
   call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 13, i1 false)
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -66,7 +66,7 @@
   call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %fmt, i8* %str)
 ; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
 ; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %str, i32 [[LENINC]], i1 false)
   ret void
 ; CHECK-NEXT: ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/stack-overalign.ll b/llvm/test/Transforms/InstCombine/stack-overalign.ll
index a8f086e..c7d9d51 100644
--- a/llvm/test/Transforms/InstCombine/stack-overalign.ll
+++ b/llvm/test/Transforms/InstCombine/stack-overalign.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep "align 32" | count 1
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; It's tempting to have an instcombine in which the src pointer of a
 ; memcpy is aligned up to the alignment of the destination, however
@@ -15,15 +15,20 @@
 
 @dst = global [1024 x i8] zeroinitializer, align 32
 
+; Make sure the dest gets the align 32 from the global
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 32
+; But that the source continues to not have an alignment set.
+; CHECK: i8* %src1
+
 define void @foo() nounwind {
 entry:
   %src = alloca [1024 x i8], align 1
   %src1 = getelementptr [1024 x i8], [1024 x i8]* %src, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i1 false)
   call void @frob(i8* %src1) nounwind
   ret void
 }
 
 declare void @frob(i8*)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
index 2fcc34b..c8b6647 100644
--- a/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -16,7 +16,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
   ret i8* %ret
@@ -27,7 +27,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12)
   ret i8* %ret
@@ -38,7 +38,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 11)
   %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
index 7a21a49..23c6998 100644
--- a/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -16,7 +16,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
   ret i8* %ret
@@ -27,7 +27,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
   ret i8* %ret
@@ -38,7 +38,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll b/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
index 03690b9..bc3ff35 100644
--- a/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/strncpy_chk-1.ll
@@ -16,7 +16,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
   ret i8* %ret
@@ -27,7 +27,7 @@
   %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
   %src = getelementptr inbounds [12 x i8], [12 x i8]* @.str, i32 0, i32 0
 
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 12, i1 false)
 ; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0)
   %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12)
   ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
index c75a839..c237881 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 ; Verify that instcombine preserves TBAA tags when converting a memcpy into
 ; a scalar load and store.
@@ -17,7 +17,7 @@
 entry:
   %0 = bitcast %struct.test1* %a to i8*
   %1 = bitcast %struct.test1* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 4, i32 4, i1 false), !tbaa.struct !3
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 4, i1 false), !tbaa.struct !3
   ret void
 }
 
@@ -29,7 +29,7 @@
 ; CHECK: ret
   %tmp = alloca %struct.test2, align 8
   %tmp1 = bitcast %struct.test2* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* undef, i64 8, i32 8, i1 false), !tbaa.struct !4
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* undef, i64 8, i1 false), !tbaa.struct !4
   %tmp2 = getelementptr %struct.test2, %struct.test2* %tmp, i32 0, i32 0
   %tmp3 = load i32 (i8*, i32*, double*)**, i32 (i8*, i32*, double*)*** %tmp2
   ret i32 (i8*, i32*, double*)*** %tmp2
diff --git a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
index 67cabf3..c709b9a 100644
--- a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
@@ -7,7 +7,7 @@
 define void @test10(i8 addrspace(2)* %X) nounwind ssp {
 ; CHECK-LABEL: @test10(
 ; CHECK: entry:
-; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* %X, i8 0, i16 10000, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* align 1 %X, i8 0, i16 10000, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 
diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 27a9551..529a8f9 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -24,7 +24,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test1(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -47,7 +47,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test1a(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -70,7 +70,7 @@
 ; CHECK-LABEL: @test2(
 ; CHECK: br i1 %cmp10,
 ; CHECK: %0 = shl i64 %Size, 2
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %Base1, i8 1, i64 %0, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -119,7 +119,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-TODO-LABEL: @test4(
-; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
+; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i1 false)
 ; CHECK-TODO-NOT: store
 }
 
@@ -166,7 +166,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test6(
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
@@ -191,7 +191,7 @@
 for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test7(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %Base, i8 0, i64 %Size, i1 false)
 ; CHECK-NOT: store
 }
 
@@ -276,7 +276,7 @@
   ret void
 ; CHECK-LABEL: @test10(
 ; CHECK: entry:
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %X, i8 0, i64 10000, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
@@ -324,7 +324,7 @@
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: bitcast
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 %P1, i8 0, i64 80000, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
@@ -441,7 +441,7 @@
 for.cond.cleanup:
   ret void
 ; CHECK-LABEL: @test15(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %f1, i8 0, i64 262148, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index f73addd..d784d35 100644
--- a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -25,12 +25,12 @@
   call fastcc void @initialize(%0* noalias sret %memtmp)
   %tmp1 = bitcast %0* %tmp to i8*
   %memtmp2 = bitcast %0* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i1 false)
   %z3 = bitcast %0* %z to i8*
   %tmp4 = bitcast %0* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i1 false)
   %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index ffbb299..26c221d 100644
--- a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -13,10 +13,10 @@
   call void @g(%a* %a_var)
   %a_i8 = bitcast %a* %a_var to i8*
   %b_i8 = bitcast %b* %b_var to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i1 false)
   %tmp1 = getelementptr %b, %b* %b_var, i32 0, i32 0
   %tmp2 = load float, float* %tmp1
   ret float %tmp2
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/align.ll b/llvm/test/Transforms/MemCpyOpt/align.ll
index 9074684..550bc5d 100644
--- a/llvm/test/Transforms/MemCpyOpt/align.ll
+++ b/llvm/test/Transforms/MemCpyOpt/align.ll
@@ -1,15 +1,15 @@
 ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 ; The resulting memset is only 4-byte aligned, despite containing
 ; a 16-byte aligned store in the middle.
 
 define void @foo(i32* %p) {
 ; CHECK-LABEL: @foo(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i1 false)
   %a0 = getelementptr i32, i32* %p, i64 0
   store i32 0, i32* %a0, align 4
   %a1 = getelementptr i32, i32* %p, i64 1
@@ -31,7 +31,7 @@
   %a8 = alloca i32, align 8
   %a8.cast = bitcast i32* %a8 to i8*
   %a4.cast = bitcast i32* %a4 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %a8.cast, i8 0, i64 4, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a4.cast, i8* %a8.cast, i64 4, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a8.cast, i8 0, i64 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a4.cast, i8* %a8.cast, i64 4, i1 false)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/atomic.ll b/llvm/test/Transforms/MemCpyOpt/atomic.ll
index 5be6b15..38a0e12 100644
--- a/llvm/test/Transforms/MemCpyOpt/atomic.ll
+++ b/llvm/test/Transforms/MemCpyOpt/atomic.ll
@@ -7,7 +7,7 @@
 
 declare void @otherf(i32*)
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 ; memcpyopt should not touch atomic ops
 define void @test1() nounwind uwtable ssp {
@@ -15,7 +15,7 @@
 ; CHECK: store atomic
   %x = alloca [101 x i32], align 16
   %bc = bitcast [101 x i32]* %x to i8*
-  call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i1 false)
   %gep1 = getelementptr inbounds [101 x i32], [101 x i32]* %x, i32 0, i32 100
   store atomic i32 0, i32* %gep1 unordered, align 4
   %gep2 = getelementptr inbounds [101 x i32], [101 x i32]* %x, i32 0, i32 0
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll
index b6ea129..9a376d6 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll
@@ -6,14 +6,14 @@
 define void @test(i8* %src) {
   %tmp = alloca i8
   %dst = alloca i8
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i32 8, i1 false), !noalias !2
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i32 8, i1 false)
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i1 false), !noalias !2
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i1 false)
 
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
 
 ; Check that the noalias for "dst" was removed by checking that the metadata is gone
 ; CHECK-NOT: "dst"
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
index 4c3e3e8..a1ba2ba 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) unnamed_addr nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 ; all bytes of %dst that are touch by the memset are dereferenceable
 define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
@@ -11,8 +11,8 @@
 ; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
   %src = alloca [4096 x i8], align 1
   %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0
-  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i1 false) #2
   ret void
 }
 
@@ -24,7 +24,7 @@
 ; CHECK: call void @llvm.memset.p0i8.i64
   %src = alloca [4096 x i8], align 1
   %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0
-  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i1 false) #2
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
index 17614fd..2671a9a 100644
--- a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
+++ b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
@@ -3,13 +3,13 @@
 target datalayout = "e"
 
 declare void @foo(i8*)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 define void @test() {
   %ptr1 = alloca i8
   %ptr2 = alloca i8
   call void @foo(i8* %ptr2)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
   call void @foo(i8* %ptr1)
   ret void
 
diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll
index 7d7f3a6..836a610 100644
--- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll
@@ -152,11 +152,11 @@
         
 ; CHECK-LABEL: @test2(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %tmp41, i8 -1, i64 8, i1 false)
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 32, i1 false)
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 0, i64 32, i1 false)
 ; CHECK-NOT: store
 ; CHECK: ret
 }
@@ -171,11 +171,11 @@
   store i32 0, i32* %arrayidx, align 4
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
   ret void
 ; CHECK-LABEL: @test3(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
 }
 
 ; store followed by memset, different offset scenario
@@ -184,40 +184,40 @@
   store i32 0, i32* %P, align 4
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 1
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
   ret void
 ; CHECK-LABEL: @test4(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 ; Memset followed by store.
 define void @test5(i32* nocapture %P) nounwind ssp {
 entry:
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
   %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
   store i32 0, i32* %arrayidx, align 4
   ret void
 ; CHECK-LABEL: @test5(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
 }
 
 ;; Memset followed by memset.
 define void @test6(i32* nocapture %P) nounwind ssp {
 entry:
   %0 = bitcast i32* %P to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i1 false)
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
   %1 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i1 false)
   ret void
 ; CHECK-LABEL: @test6(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 24, i1 false)
 }
 
 ; More aggressive heuristic
@@ -233,7 +233,7 @@
   %4 = getelementptr inbounds i32, i32* %c, i32 4
   store i32 -1, i32* %4, align 4
 ; CHECK-LABEL: @test7(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %5, i8 -1, i64 20, i1 false)
   ret void
 }
 
@@ -270,17 +270,17 @@
   store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1
   ret void
 ; CHECK-LABEL: @test9(
-; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i1 false)
 }
 
 ; PR19092
 define void @test10(i8* nocapture %P) nounwind {
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
-  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i1 false)
   ret void
 ; CHECK-LABEL: @test10(
 ; CHECK-NOT: memset
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 0, i64 42, i1 false)
 ; CHECK-NOT: memset
 ; CHECK: ret void
 }
@@ -290,12 +290,12 @@
 entry:
   %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
   %0 = bitcast i32* %add.ptr to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i32 1, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i1 false)
   %arrayidx = getelementptr inbounds i32, i32* %P, i64 0
   %arrayidx.cast = bitcast i32* %arrayidx to i96*
   store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4
   ret void
 ; CHECK-LABEL: @test11(
 ; CHECK-NOT: store
-; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 1, i64 23, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 1, i64 23, i1 false)
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
index e3e57f0..fee7409 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
@@ -8,15 +8,15 @@
   %a = alloca [8 x i64], align 8
   %a.cast = bitcast [8 x i64]* %a to i8*
   call void @llvm.lifetime.start(i64 64, i8* %a.cast)
-  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i1 false)
   %sret.cast = bitcast [8 x i64]* %sret to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i1 false)
   call void @llvm.lifetime.end(i64 64, i8* %a.cast)
   ret void
 
 ; CHECK-LABEL: @foo(
 ; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 %[[sret_cast]], i8 0, i64 64
 ; CHECK-NOT: call void @llvm.memcpy
 ; CHECK: ret void
 }
@@ -26,24 +26,24 @@
   %a = alloca [8 x i64], align 8
   %a.cast = bitcast [8 x i64]* %a to i8*
   call void @llvm.lifetime.start(i64 64, i8* %a.cast)
-  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a.cast, i8 0, i64 64, i1 false)
   %sret.cast = bitcast [8 x i64]* %sret to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
-  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 42, i64 32, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %sret.cast, i8* align 8 %a.cast, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a.cast, i8 42, i64 32, i1 false)
   %out.cast = bitcast [8 x i64]* %out to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %out.cast, i8* align 8 %a.cast, i64 64, i1 false)
   call void @llvm.lifetime.end(i64 64, i8* %a.cast)
   ret void
 
 ; CHECK-LABEL: @bar(
 ; CHECK:         %[[a:[^=]+]] = alloca [8 x i64]
 ; CHECK:         %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8*
-; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* align 8 %[[a_cast]], i8 0, i64 64
 ; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
-; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64
-; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* align 8 %[[sret_cast]], i8 0, i64 64
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* align 8 %[[a_cast]], i8 42, i64 32
 ; CHECK:         %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8*
-; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64
+; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[out_cast]], i8* align 8 %[[a_cast]], i64 64
 ; CHECK-NOT: call void @llvm.memcpy
 ; CHECK: ret void
 }
@@ -51,5 +51,5 @@
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
index fd8b93c..8f27fa9 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
@@ -2,13 +2,13 @@
 
 @cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 declare void @foo(i32*) nounwind
 
 define void @test1() nounwind {
   %arr = alloca [3 x i32], align 4
   %arr_i8 = bitcast [3 x i32]* %arr to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i1 false)
   %arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %arr, i64 0, i64 0
   call void @foo(i32* %arraydecay) nounwind
   ret void
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
index c75d020..9d8db7e 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll
@@ -11,7 +11,7 @@
   store i8 98, i8* %1, align 4
   %2 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 1, i64 0
   %3 = getelementptr inbounds [7 x i8], [7 x i8]* %bletch.sroa.1, i64 0, i64 0
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 7, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 7, i1 false)
   %4 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 2
   store i32 20, i32* %4, align 4
   ret i32 undef
@@ -23,7 +23,7 @@
 
 define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
   call void @llvm.lifetime.start(i64 8, i8* %in)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false)
   ret void
 
 ; Check that the memcpy is removed.
@@ -33,7 +33,7 @@
 
 define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
   call void @llvm.lifetime.start(i64 4, i8* %in)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false)
   ret void
 
 ; Check that the memcpy is not removed.
@@ -41,6 +41,6 @@
 ; CHECK: call void @llvm.memcpy
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 6181543..e5a8b62 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -14,9 +14,9 @@
   call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
   %tmp219 = bitcast %0* %tmp2 to i8*
   %memtmp20 = bitcast %0* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %tmp219, i8* align 16 %memtmp20, i32 32, i1 false)
   %agg.result21 = bitcast %0* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result21, i8* align 16 %tmp219, i32 32, i1 false)
   ret void
 
 ; Check that one of the memcpy's are removed.
@@ -37,12 +37,12 @@
 define void @test2(i8* %P, i8* %Q) nounwind  {
   %memtmp = alloca %0, align 16
   %R = bitcast %0* %memtmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
   ret void
         
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
+; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* align 16 %Q, i8* align 16 %P
 ; CHECK-NEXT: ret void
 }
 
@@ -54,9 +54,9 @@
 define void @test3(%0* noalias sret %agg.result) nounwind  {
   %x.0 = alloca %0
   %x.01 = bitcast %0* %x.0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x.01, i8* align 16 bitcast (%0* @x to i8*), i32 32, i1 false)
   %agg.result2 = bitcast %0* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result2, i8* align 16 %x.01, i32 32, i1 false)
   ret void
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: %agg.result1 = bitcast 
@@ -69,7 +69,7 @@
 define void @test4(i8 *%P) {
   %A = alloca %1
   %a = bitcast %1* %A to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %P, i64 8, i1 false)
   call void @test4a(i8* align 1 byval %a)
   ret void
 ; CHECK-LABEL: @test4(
@@ -77,8 +77,8 @@
 }
 
 declare void @test4a(i8* align 1 byval)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
 
 %struct.S = type { i128, [4 x i8]}
 
@@ -92,7 +92,7 @@
 entry:
   %y = alloca %struct.S, align 16
   %tmp = bitcast %struct.S* %y to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i1 false)
   %a = getelementptr %struct.S, %struct.S* %y, i64 0, i32 1, i64 0
   store i8 4, i8* %a
   call void @test5a(%struct.S* align 16 byval %y)
@@ -104,7 +104,7 @@
 
 ;; Noop memcpy should be zapped.
 define void @test6(i8 *%P) {
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i1 false)
   ret void
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: ret void
@@ -120,7 +120,7 @@
   %agg.tmp = alloca %struct.p, align 4
   %tmp = bitcast %struct.p* %agg.tmp to i8*
   %tmp1 = bitcast %struct.p* %q to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i1 false)
   %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
   ret i32 %call
 ; CHECK-LABEL: @test7(
@@ -129,7 +129,7 @@
 
 declare i32 @g(%struct.p* align 8 byval)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 ; PR11142 - When looking for a memcpy-memcpy dependency, don't get stuck on
 ; instructions between the memcpy's that only affect the destination pointer.
@@ -140,10 +140,10 @@
 ; CHECK-NOT: memcpy
   %A = tail call i8* @malloc(i32 10)
   %B = getelementptr inbounds i8, i8* %A, i64 2
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i1 false)
   %C = tail call i8* @malloc(i32 10)
   %D = getelementptr inbounds i8, i8* %C, i64 2
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i1 false)
   ret void
 ; CHECK: ret void
 }
@@ -164,7 +164,7 @@
   call void @f1(%struct.big* sret %tmp)
   %0 = addrspacecast %struct.big* %b to i8 addrspace(1)*
   %1 = addrspacecast %struct.big* %tmp to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i64 200, i32 4, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %0, i8 addrspace(1)* %1, i64 200, i1 false)
   call void @f2(%struct.big* %b)
   ret void
 }
@@ -180,7 +180,7 @@
   call void @f1(%struct.big* sret %tmp)
   %0 = bitcast %struct.big* %b to i8*
   %1 = bitcast %struct.big* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 200, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 200, i1 false)
   call void @f2(%struct.big* %b)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/memmove.ll b/llvm/test/Transforms/MemCpyOpt/memmove.ll
index 1af85a1..91f2851 100644
--- a/llvm/test/Transforms/MemCpyOpt/memmove.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memmove.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
 
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 define i8* @test1(i8* nocapture %src) nounwind {
 entry:
@@ -14,7 +14,7 @@
   %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32))
   %call3 = bitcast i8* %malloccall to [13 x i8]*
   %call3.sub = getelementptr inbounds [13 x i8], [13 x i8]* %call3, i64 0, i64 0
-  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i1 false)
   ret i8* %call3.sub
 }
 declare noalias i8* @malloc(i32)
@@ -25,7 +25,7 @@
 ; CHECK-LABEL: @test2(
 ; CHECK: call void @llvm.memcpy
   %add.ptr = getelementptr i8, i8* %P, i64 16
-  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i1 false)
   ret void
 }
 
@@ -35,6 +35,6 @@
 ; CHECK-LABEL: @test3(
 ; CHECK: call void @llvm.memmove
   %add.ptr = getelementptr i8, i8* %P, i64 16
-  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i1 false)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
index 993c8a1..4f50010 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
@@ -7,12 +7,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %c, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
   ret void
 }
 
@@ -22,12 +22,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i32_i64(i8* %dst, i8* %src, i32 %dst_size, i64 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 1 %dst, i8 %c, i32 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
   ret void
 }
 
@@ -37,12 +37,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i128 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i128 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[DST]], i8 %c, i128 [[SIZE]], i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i128_i32(i8* %dst, i8* %src, i128 %dst_size, i32 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i128(i8* %dst, i8 %c, i128 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i128(i8* align 1 %dst, i8 %c, i128 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
   ret void
 }
 
@@ -52,12 +52,12 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i128 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i128 [[DSTSIZE]], %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[DST]], i8 %c, i128 [[SIZE]], i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* align 1 %dst, i8* align 1 %src, i128 %src_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i32_i128(i8* %dst, i8* %src, i32 %dst_size, i128 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 1 %dst, i8 %c, i32 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i128(i8* align 1 %dst, i8* align 1 %src, i128 %src_size, i1 false)
   ret void
 }
 
@@ -67,36 +67,36 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, [[SRCSIZE]]
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_i64_i32(i8* %dst, i8* %src, i64 %dst_size, i32 %src_size, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %c, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %src, i32 %src_size, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_same
-; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false)
 define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 80, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_min
-; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 4, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 {{.*}}, i1 false)
 define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 36, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_memcpy
-; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false)
 define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 80, i1 false)
   ret void
 }
 
@@ -106,37 +106,37 @@
 ; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
 ; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) {
   %dst = bitcast i64* %dst_pi64 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %c, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %src_size, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_dst
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %dst_size, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst2, i8* align 1 %src, i64 %src_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) {
-  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst2, i8* align 1 %src, i64 %src_size, i1 false)
   ret void
 }
 
 ; Make sure we also take into account dependencies on the destination.
 
 ; CHECK-LABEL: define i8 @test_intermediate_read
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %a, i8 0, i64 64, i1 false)
 ; CHECK-NEXT: %r = load i8, i8* %a
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 24, i1 false)
 ; CHECK-NEXT: ret i8 %r
 define i8 @test_intermediate_read(i8* %a, i8* %b) #0 {
-  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 1 %a, i8 0, i64 64, i1 false)
   %r = load i8, i8* %a
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 24, i1 false)
   ret i8 %r
 }
 
@@ -146,23 +146,23 @@
 ; CHECK-NEXT: %a = alloca %struct
 ; CHECK-NEXT: %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0
 ; CHECK-NEXT: %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 %a0, i8 0, i64 16, i1 false)
 ; CHECK-NEXT: store i8 1, i8* %a1
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a0, i8* align 1 %b, i64 8, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_intermediate_write(i8* %b) #0 {
   %a = alloca %struct
   %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0
   %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0
-  call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 1 %a0, i8 0, i64 16, i1 false)
   store i8 1, i8* %a1
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %a0, i8* align 1 %b, i64 8, i1 false)
   ret void
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
-declare void @llvm.memset.p0i8.i128(i8* nocapture, i8, i128, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i128(i8* nocapture, i8* nocapture readonly, i128, i32, i1)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
+declare void @llvm.memset.p0i8.i128(i8* nocapture, i8, i128, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i128(i8* nocapture, i8* nocapture readonly, i128, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
index 1c56704..b0abab8 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
@@ -3,99 +3,99 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK-LABEL: define void @test(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i1 false)
 ; CHECK-NEXT: ret void
 define void @test(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_smaller_memcpy(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_smaller_memcpy(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_smaller_memset(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_smaller_memset(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_align_memset(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_align_memset(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_types(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_types_2(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false)
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_types_2(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_different_source_gep(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
 ; CHECK-NEXT: %p = getelementptr i8, i8* %dst1, i64 64
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
   ; FIXME: We could optimize this as well.
   %p = getelementptr i8, i8* %dst1, i64 64
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_variable_size_1(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_variable_size_1(i8* %dst1, i64 %dst1_size, i8* %dst2, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: define void @test_variable_size_2(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false)
 ; CHECK-NEXT: ret void
 define void @test_variable_size_2(i8* %dst1, i8* %dst2, i64 %dst2_size, i8 %c) {
-  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false)
   ret void
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/smaller.ll b/llvm/test/Transforms/MemCpyOpt/smaller.ll
index 8f6cafa6..17aca87 100644
--- a/llvm/test/Transforms/MemCpyOpt/smaller.ll
+++ b/llvm/test/Transforms/MemCpyOpt/smaller.ll
@@ -4,7 +4,7 @@
 ; Memcpyopt shouldn't optimize the second memcpy using the first
 ; because the first has a smaller size.
 
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false)
 
 target datalayout = "e-p:32:32:32"
 
@@ -14,15 +14,15 @@
 @cell = external global %struct.s
 
 declare void @check(%struct.s* byval %p) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 define void @foo() nounwind {
 entry:
   %agg.tmp = alloca %struct.s, align 4
   store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i1 false)
   %tmp = getelementptr inbounds %struct.s, %struct.s* %agg.tmp, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false)
   call void @check(%struct.s* byval %agg.tmp)
   ret void
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/sret.ll b/llvm/test/Transforms/MemCpyOpt/sret.ll
index 34ba4c4..a99b52d 100644
--- a/llvm/test/Transforms/MemCpyOpt/sret.ll
+++ b/llvm/test/Transforms/MemCpyOpt/sret.ll
@@ -21,10 +21,10 @@
   call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
   %memtmp14 = bitcast %0* %memtmp to i8*
   %agg.result15 = bitcast %0* %agg.result to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result15, i8* align 16 %memtmp14, i32 32, i1 false)
   ret void
 }
 
 declare void @ccoshl(%0* noalias nocapture sret, %0* byval) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/MergeFunc/vector.ll b/llvm/test/Transforms/MergeFunc/vector.ll
index ef13753..db95ec7 100644
--- a/llvm/test/Transforms/MergeFunc/vector.ll
+++ b/llvm/test/Transforms/MergeFunc/vector.ll
@@ -59,7 +59,7 @@
 
 declare void @_ZdlPv(i8*) nounwind
 
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 declare void @_ZSt17__throw_bad_allocv() noreturn
 
diff --git a/llvm/test/Transforms/MetaRenamer/metarenamer.ll b/llvm/test/Transforms/MetaRenamer/metarenamer.ll
index 213fbe3..90d861d 100644
--- a/llvm/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/llvm/test/Transforms/MetaRenamer/metarenamer.ll
@@ -35,11 +35,11 @@
   store double 4.000000e+00, double* %6, align 8
   %7 = bitcast %struct.foo_xxx* %agg.result to i8*
   %8 = bitcast %struct.foo_xxx* %1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 24, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 24, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 define i32 @func_5_xxx(i32 %arg_1_xxx, i32 %arg_2_xxx, i32 %arg_3_xxx, i32 %arg_4_xxx) nounwind uwtable ssp {
   %1 = alloca i32, align 4
diff --git a/llvm/test/Transforms/ObjCARC/nested.ll b/llvm/test/Transforms/ObjCARC/nested.ll
index cf14a1f9..589300c 100644
--- a/llvm/test/Transforms/ObjCARC/nested.ll
+++ b/llvm/test/Transforms/ObjCARC/nested.ll
@@ -12,7 +12,7 @@
 declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 declare i8* @objc_retain(i8*)
 declare void @objc_enumerationMutation(i8*)
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 declare void @use(i8*)
 declare void @objc_release(i8*)
@@ -35,7 +35,7 @@
   %items.ptr = alloca [16 x i8*], align 8
   %0 = call i8* @objc_retain(i8* %a) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -100,7 +100,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -165,7 +165,7 @@
   %tmp = load i8*, i8** @g, align 8
   %0 = call i8* @objc_retain(i8* %tmp) nounwind
   %tmp2 = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 64, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp4 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp4, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -230,7 +230,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -296,7 +296,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -365,7 +365,7 @@
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   call void @callee()
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -431,7 +431,7 @@
   %call = call i8* @returner()
   %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
   %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -508,7 +508,7 @@
   %call1 = call i8* @returner()
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -575,7 +575,7 @@
   %call1 = call i8* @returner()
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -642,7 +642,7 @@
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   call void @callee()
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
@@ -710,7 +710,7 @@
   %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
   call void @callee()
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
-  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
   %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
diff --git a/llvm/test/Transforms/PlaceSafepoints/memset.ll b/llvm/test/Transforms/PlaceSafepoints/memset.ll
index 534b2f1..e630fd0 100644
--- a/llvm/test/Transforms/PlaceSafepoints/memset.ll
+++ b/llvm/test/Transforms/PlaceSafepoints/memset.ll
@@ -5,13 +5,13 @@
 ; CHECK-NEXT: llvm.memset
 ; CHECK: do_safepoint
 ; CHECK: @foo
-  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %ptr, i8 0, i64 24, i32 8, i1 false)
+  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %ptr, i8 0, i64 24, i1 false)
   call void @foo()
   ret void
 }
 
 declare void @foo()
-declare void @llvm.memset.p1i8.i64(i8 addrspace(1)*, i8, i64, i32, i1)
+declare void @llvm.memset.p1i8.i64(i8 addrspace(1)*, i8, i64, i1)
 
 declare void @do_safepoint()
 define void @gc.safepoint_poll() {
diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll
index 004695d..f76bd46 100644
--- a/llvm/test/Transforms/SROA/address-spaces.ll
+++ b/llvm/test/Transforms/SROA/address-spaces.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
-declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1)
-declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
-declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i1)
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1)
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1)
 
 
 ; Make sure an illegal bitcast isn't introduced
@@ -16,9 +16,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false)
   %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
   ret void
 }
 
@@ -30,9 +30,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 2 %aaptr, i8 addrspace(1)* align 2 %aptr, i32 16, i1 false)
   %bptr = bitcast i16* %b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
   ret void
 }
 
@@ -44,9 +44,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64>* %a to i8*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %aaptr, i8* align 2 %aptr, i32 16, i1 false)
   %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
   ret void
 }
 
@@ -62,7 +62,7 @@
   store [5 x i64] %in.coerce, [5 x i64]* %0, align 8
   %scevgep9 = getelementptr %struct.struct_test_27.0.13, %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0
   %scevgep910 = bitcast i32* %scevgep9 to i8*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 undef, i8* align 4 %scevgep910, i32 16, i1 false)
   ret void
 }
  
diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll
index 455d142..dd06613 100644
--- a/llvm/test/Transforms/SROA/alignment.ll
+++ b/llvm/test/Transforms/SROA/alignment.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
 
 define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
 ; CHECK-LABEL: @test1(
@@ -23,8 +23,8 @@
 
   store i8 420, i8* %gep_alloca, align 16
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_alloca, i8* %gep_a, i32 2, i32 16, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %gep_alloca, i8* align 16 %gep_a, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %gep_b, i8* align 16 %gep_alloca, i32 2, i1 false)
   ret void
 }
 
@@ -57,9 +57,9 @@
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64>* %a to i8*
   %aaptr = bitcast <2 x i64>* %aa to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %aaptr, i8* align 2 %aptr, i32 16, i1 false)
   %bptr = bitcast i16* %b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %bptr, i8* align 2 %aaptr, i32 16, i1 false)
   ret void
 }
 
@@ -77,10 +77,10 @@
   %a = alloca { i8*, i8*, i8* }
   %b = alloca { i8*, i8*, i8* }
   %a_raw = bitcast { i8*, i8*, i8* }* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a_raw, i8* %x, i32 22, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %a_raw, i8* align 8 %x, i32 22, i1 false)
   %b_raw = bitcast { i8*, i8*, i8* }* %b to i8*
   %b_gep = getelementptr i8, i8* %b_raw, i32 6
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %b_gep, i8* align 2 %x, i32 18, i1 false)
   ret void
 }
 
@@ -155,7 +155,7 @@
   %raw2 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 8
   %ptr2 = bitcast i8* %raw2 to double*
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i1 false)
 ; CHECK: %[[val2:.*]] = load double, double* %{{.*}}, align 1
 ; CHECK: %[[val1:.*]] = load double, double* %{{.*}}, align 1
 
@@ -165,7 +165,7 @@
   store double %val1, double* %ptr1, align 1
   store double %val2, double* %ptr2, align 1
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i1 false)
 ; CHECK: store double %[[val1]], double* %{{.*}}, align 1
 ; CHECK: store double %[[val2]], double* %{{.*}}, align 1
 
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll
index 7b5daa9..d9d87a3 100644
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -80,31 +80,31 @@
 ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
 
   %b = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 42
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
 ; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207
 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85
 
   ; Clobber a single element of the array, this should be promotable.
   %c = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 42
@@ -244,59 +244,59 @@
 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
 
   %overlap2.prefix = getelementptr i8, i8* %overlap2.1.1.i8, i64 -4
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 39
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 3
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 3
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 3
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 5
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5
 
   ; Bridge between the overlapping areas
-  call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 5
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 5
 ; ...promoted i8 store...
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 2
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2
 
   ; Entirely within the second overlap.
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5
 
   ; Trailing past the second overlap.
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 3
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 42
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42
 ; CHECK-NEXT: store i8 0, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 142
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 158
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 200
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 207
 ; CHECK-NEXT: store i8 42, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 208
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 215
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85
 
   ret void
 }
@@ -315,9 +315,9 @@
 ; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
 
   %b = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 20
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 20
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]]
@@ -325,10 +325,10 @@
 ; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 23
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 30
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 40
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]]
@@ -336,7 +336,7 @@
 ; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 50
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]]
@@ -344,31 +344,31 @@
 ; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 53
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 60
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40
 
   %a.src.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 20
   %a.dst.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 40
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 
   ; Clobber a single element of the array, this should be promotable, and be deleted.
   %c = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 42
   store i8 0, i8* %c
 
   %a.src.2 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 50
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i1 false)
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i1 false)
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 20
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 20
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]]
@@ -376,10 +376,10 @@
 ; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 23
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 30
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 40
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
@@ -387,7 +387,7 @@
 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 50
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
@@ -395,18 +395,18 @@
 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 53
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 60
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40
 
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
 
 define i16 @test5() {
 ; CHECK-LABEL: @test5(
@@ -436,7 +436,7 @@
 entry:
   %a = alloca [4 x i8]
   %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i1 true)
   %iptr = bitcast i8* %ptr to i32*
   %val = load i32, i32* %iptr
   ret i32 %val
@@ -456,8 +456,8 @@
 entry:
   %a = alloca [4 x i8]
   %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true)
   ret void
 }
 
@@ -543,7 +543,7 @@
 entry:
   %a = alloca [8 x i8]
   %ptr = getelementptr [8 x i8], [8 x i8]* %a, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i1 false)
   %s2ptrptr = bitcast i8* %ptr to %S2**
   %s2ptr = load %S2*, %S2** %s2ptrptr
   ret %S2* %s2ptr
@@ -743,10 +743,10 @@
 entry:
   %a = alloca [3 x i8]
   %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 false)
   %cast = bitcast i8* %ptr to i24*
   store i24 0, i24* %cast
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 false)
   ret void
 }
 
@@ -763,8 +763,8 @@
 entry:
   %a = alloca [3 x i8]
   %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true)
   ret void
 }
 
@@ -780,7 +780,7 @@
 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[agep2]], i8 42, i32 %size,
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[agep2]], i8 42, i32 %size,
 ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
 ; CHECK-NEXT: store i32 42, i32* %[[dstcast1]]
 ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8, i8* %dst, i64 4
@@ -793,14 +793,14 @@
 entry:
   %a = alloca [42 x i8]
   %ptr = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 false)
   %ptr2 = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 8
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i32 1, i1 false)
-  call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i1 false)
   %cast = bitcast i8* %ptr to i32*
   store i32 42, i32* %cast
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i1 false)
   ret void
 }
 
@@ -819,7 +819,7 @@
   %a = alloca { i64, i8* }
   %cast1 = bitcast %opaque* %x to i8*
   %cast2 = bitcast { i64, i8* }* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i1 false)
   %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0
   %val = load i64, i64* %gep
   ret i32 undef
@@ -853,7 +853,7 @@
   ret i32 %sum2
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
 
 define i8 @test21() {
 ; Test allocations and offsets which border on overflow of the int64_t used
@@ -869,7 +869,7 @@
   store i8 255, i8* %gep0
   %gep1 = getelementptr [2305843009213693951 x i8], [2305843009213693951 x i8]* %a, i64 0, i64 -9223372036854775807
   %gep2 = getelementptr i8, i8* %gep1, i64 -1
-  call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i1 false)
   %gep3 = getelementptr i8, i8* %gep1, i64 9223372036854775807
   %gep4 = getelementptr i8, i8* %gep3, i64 9223372036854775807
   %gep5 = getelementptr i8, i8* %gep4, i64 -6917529027641081857
@@ -894,7 +894,7 @@
 
 entry:
   %a = alloca i8
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i1 false)
   %tmp2 = load i8, i8* %a
   ret void
 }
@@ -914,7 +914,7 @@
 if.then:
   %tmp0 = bitcast %PR13916.struct* %a to i8*
   %tmp1 = bitcast %PR13916.struct* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i1 false)
   br label %if.end
 
 if.end:
@@ -992,7 +992,7 @@
   store %PR14034.list* undef, %PR14034.list** %prev
   %cast0 = bitcast %PR14034.struct* undef to i8*
   %cast1 = bitcast %PR14034.struct* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i1 false)
   ret void
 }
 
@@ -1065,15 +1065,15 @@
 
   ; Also use a memset to the middle 32-bits for fun.
   %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8, i8* %0, i32 2
-  call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i1 false)
 
   ; Or a memset of the whole thing.
-  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i1 false)
 
   ; Write to the high 32-bits with a memcpy.
   %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8, i8* %0, i32 4
   %d.raw = bitcast double* %d to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i1 false)
 
   ; Store to the high 32-bits...
   %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
@@ -1146,7 +1146,7 @@
 
   %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i1 true)
   ret void
 ; CHECK: ret
 }
@@ -1164,7 +1164,7 @@
 
   %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
   %cast2 = bitcast { [16 x i8 ] }* %a to i8*
-  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i1 true)
   ret void
 ; CHECK: ret
 }
@@ -1177,7 +1177,7 @@
   %stack = alloca [1048576 x i32], align 16
 ; CHECK: alloca [1048576 x i32]
   %cast = bitcast [1048576 x i32]* %stack to i8*
-  call void @llvm.memset.p0i8.i64(i8* %cast, i8 -2, i64 4194304, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %cast, i8 -2, i64 4194304, i1 false)
   ret void
 ; CHECK: ret
 }
@@ -1206,7 +1206,7 @@
 ; CHECK-NEXT: {{.*}} = load i8, i8* %[[b]], align 8
 
   %a.i8 = bitcast <{ i1 }>* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i1 false) nounwind
   %bar = load i8, i8* %a.i8, align 1
   %a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0
   %baz = load i1, i1* %a.i1, align 1
@@ -1261,7 +1261,7 @@
 ; CHECK: alloca
 
   %a.i8 = bitcast i32* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i1 false)
   %v = load i32, i32* %a
   ret i32 %v
 }
@@ -1323,7 +1323,7 @@
 
 end:
   %tmp.raw = bitcast [4 x i8]* %tmp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i1 false)
   ret void
 ; CHECK: ret void
 }
@@ -1376,7 +1376,7 @@
 entry:
   %b = alloca i32, align 4
   %b.cast = bitcast i32* %b to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i32 4, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i1 true)
   %b.gep = getelementptr inbounds i8, i8* %b.cast, i32 2
   load i8, i8* %b.gep, align 2
   unreachable
@@ -1413,7 +1413,7 @@
   %gep0 = getelementptr inbounds i32, i32* %a, i32 0
   %cast1 = bitcast i32* %gep1 to i8*
   %cast0 = bitcast i32* %gep0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i1 false)
   ret void
 }
 
@@ -1424,7 +1424,7 @@
 entry:
   %f = alloca i8
   %gep = getelementptr i8, i8* %f, i64 -1
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i1 false)
   ret void
 }
 
@@ -1439,8 +1439,8 @@
 entry:
   %a = alloca i64, align 16
   %ptr = bitcast i64* %a to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 true)
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 true)
   ret void
 }
 
diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll
index ea41a20..1b724f3 100644
--- a/llvm/test/Transforms/SROA/big-endian.ll
+++ b/llvm/test/Transforms/SROA/big-endian.ll
@@ -210,7 +210,7 @@
   store i64 34494054408, i64* %a2
   %tmp0 = bitcast { i32, i24 }* %a to i8*
   %tmp1 = bitcast i64* %a2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i1 false)
 ; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
 ; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
 ; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
@@ -233,4 +233,4 @@
 ; CHECK: ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
diff --git a/llvm/test/Transforms/SROA/slice-order-independence.ll b/llvm/test/Transforms/SROA/slice-order-independence.ll
index 7d57be6d..faf9750 100644
--- a/llvm/test/Transforms/SROA/slice-order-independence.ll
+++ b/llvm/test/Transforms/SROA/slice-order-independence.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 ; Check that the chosen type for a split is independent from the order of
 ; slices even in case of types that are skipped because their width is not a
@@ -12,7 +12,7 @@
   %arg = alloca { i16*, i32 }, align 8
   %2 = bitcast { i16*, i32 }* %0 to i8*
   %3 = bitcast { i16*, i32 }* %arg to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i1 false)
   %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
   %pb0 = bitcast i16** %b to i63*
   %b0 = load i63, i63* %pb0
@@ -27,7 +27,7 @@
   %arg = alloca { i16*, i32 }, align 8
   %2 = bitcast { i16*, i32 }* %0 to i8*
   %3 = bitcast { i16*, i32 }* %arg to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i1 false)
   %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
   %pb1 = bitcast i16** %b to i8**
   %b1 = load i8*, i8** %pb1
diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll
index 6b6ab93..68f891f 100644
--- a/llvm/test/Transforms/SROA/slice-width.ll
+++ b/llvm/test/Transforms/SROA/slice-width.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
 
 define void @no_split_on_non_byte_width(i32) {
 ; This tests that allocas are not split into slices that are not byte width multiple
@@ -39,7 +39,7 @@
 
   ; Copy from a global.
   %x_i8 = bitcast %union.Foo* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x_i8, i8* bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x_i8, i8* bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i1 false)
 
   ; Access a slice of the alloca to trigger SROA.
   %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
@@ -58,7 +58,7 @@
 
   ; Set to all ones.
   %x_i8 = bitcast %union.Foo* %x to i8*
-  call void @llvm.memset.p0i8.i32(i8* %x_i8, i8 -1, i32 32, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %x_i8, i8 -1, i32 32, i1 false)
 
   ; Access a slice of the alloca to trigger SROA.
   %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
@@ -68,7 +68,7 @@
 }
 ; CHECK-LABEL: define void @memset_fp80_padding
 ; CHECK: alloca x86_fp80
-; CHECK: call void @llvm.memset.p0i8.i32(i8* %{{.*}}, i8 -1, i32 16, i32 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 16 %{{.*}}, i8 -1, i32 16, i1 false)
 ; CHECK: store i64 -1, i64* @i64_sink
 
 %S.vec3float = type { float, float, float }
@@ -85,7 +85,7 @@
   %tmp1 = alloca %S.vec3float, align 4
   %0 = bitcast %S.vec3float* %tmp1 to i8*
   %1 = bitcast %S.vec3float* %x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i1 false)
 
   ; The following block does nothing; but appears to confuse SROA
   %unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
@@ -98,7 +98,7 @@
   %3 = bitcast %S.vec3float* %tmp1 to i8*
 ; CHECK: alloca
 ; CHECK-NOT: store <4 x float>
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i1 false)
 
   %result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2)
   ret i32 %result
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index 2d9b26b..0bf2d23 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -81,12 +81,12 @@
 ; CHECK-NOT: store
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i1 false)
 ; CHECK-NOT: memset
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -120,14 +120,14 @@
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
   %z.cast = bitcast <4 x i32>* %z to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i1 false)
 ; CHECK-NOT: memcpy
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -150,7 +150,7 @@
 ; CHECK-NEXT: ret
 }
 
-declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) nounwind
 
 ; Same as test4 with a different sized address  space pointer source.
 define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) {
@@ -167,14 +167,14 @@
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
   %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i1 false)
 ; CHECK-NOT: memcpy
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i16 0, i16 2
   %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)*
-  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -213,14 +213,14 @@
 
   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
   %a.x.cast = bitcast <4 x i32>* %a.x to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i1 false)
 ; CHECK-NOT: memcpy
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i1 false)
   %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
   %tmp2 = load i32, i32* %a.tmp2
@@ -242,8 +242,8 @@
 ; CHECK-NEXT: ret
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
 
 define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
 ; CHECK-LABEL: @test6(
@@ -326,7 +326,7 @@
 ; CHECK-NEXT: ret <4 x i32> %[[ret]]
 }
 
-declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i1) nounwind
 
 define <4 x float> @test_subvec_memset() {
 ; CHECK-LABEL: @test_subvec_memset(
@@ -336,23 +336,23 @@
 
   %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
   %a.cast0 = bitcast float* %a.gep0 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i1 false)
 ; CHECK-NOT: store
 ; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
 
   %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
   %a.cast1 = bitcast float* %a.gep1 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i1 false)
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 
   %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
   %a.cast2 = bitcast float* %a.gep2 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i1 false)
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 
   %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
   %a.cast3 = bitcast float* %a.gep3 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i1 false)
 ; CHECK-NEXT: insertelement <4 x float> 
 
   %ret = load <4 x float>, <4 x float>* %a
@@ -369,7 +369,7 @@
 
   %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
   %a.cast0 = bitcast float* %a.gep0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i1 false)
 ; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
 ; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]]
 ; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -377,7 +377,7 @@
 
   %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
   %a.cast1 = bitcast float* %a.gep1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i1 false)
 ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
 ; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]]
 ; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
@@ -385,7 +385,7 @@
 
   %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
   %a.cast2 = bitcast float* %a.gep2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i1 false)
 ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
 ; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]]
 ; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
@@ -393,12 +393,12 @@
 
   %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
   %a.cast3 = bitcast float* %a.gep3 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i1 false)
 ; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
 ; CHECK-NEXT: %[[f:.*]] = load float, float* %[[fptr]]
 ; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> 
 
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i1 false)
 ; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
 ; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
diff --git a/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
index 966b179..a4b469f 100644
--- a/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -13,12 +13,12 @@
   %tmp = alloca %struct.UnionType, align 8
   %tmp2 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
   %tmp13 = getelementptr %struct.UnionType, %struct.UnionType* %p, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i1 false)
   %tmp5 = load %struct.UnionType*, %struct.UnionType** %pointerToUnion
   %tmp56 = getelementptr %struct.UnionType, %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
   %tmp7 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
index f597613..2117ce3 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -8,10 +8,10 @@
 entry:
   %temp = alloca [200 x i8]
   %temp1 = bitcast [200 x i8]* %temp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i1 false)
   %temp3 = bitcast [200 x i8]* %temp to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index c0ff25f..e245524 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -12,7 +12,7 @@
   %0 = call i32 @a(%struct.x* %s) nounwind
   %r1 = bitcast %struct.x* %r to i8*
   %s2 = bitcast %struct.x* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i1 false)
   %1 = getelementptr %struct.x, %struct.x* %r, i32 0, i32 0, i32 1
   %2 = load i32, i32* %1, align 4
   ret i32 %2
@@ -20,4 +20,4 @@
 
 declare i32 @a(%struct.x*)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index 16d9108..f713be4 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -18,8 +18,8 @@
         ; because the type of the first element in %struct.two is i8.
 	%tmpS = getelementptr %struct.two, %struct.two* %S, i32 0, i32 0, i32 0 
 	%tmpD = bitcast %struct.two* %D to i8*
-        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i1 false)
         ret void
 }
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index 025578c..0fff595 100644
--- a/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/llvm/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -12,8 +12,8 @@
         %0 = getelementptr %struct.st, %struct.st* %s, i32 0, i32 0  ; <i16*> [#uses=1]
         store i16 1, i16* %0, align 4
         %s1 = bitcast %struct.st* %s to i8*  ; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %p, i8* align 1 %s1, i32 2, i1 false)
         ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
index d1cc424..f8f48a1 100644
--- a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -51,14 +51,14 @@
   %18 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
   %19 = bitcast %struct.int16x8x2_t* %0 to i8*
   %20 = bitcast %struct.int16x8x2_t* %18 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i1 false)
   %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
   %21 = bitcast %struct.int16x8x2_t* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i1 false)
   %22 = load %struct.int16x8x2_t*, %struct.int16x8x2_t** %dst_addr, align 4
   %23 = bitcast %struct.int16x8x2_t* %22 to i8*
   %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i1 false)
   br label %return
 
 return:                                           ; preds = %entry
@@ -76,7 +76,7 @@
 cond.true:                                        ; preds = %entry
   %tmp3 = bitcast %struct._NSRange* %range to i8*
   %tmp4 = bitcast %struct._NSRange* %range to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i1 false)
   ret void
 
 cond.false:                                       ; preds = %entry
@@ -87,4 +87,4 @@
 ; CHECK: br i1
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
index b926b02..8472d44 100644
--- a/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ b/llvm/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@@ -11,8 +11,8 @@
 ; CHECK: ret void
   %1 = alloca %struct.test
   %2 = bitcast %struct.test* %1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll b/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
index 997d03b..4401307 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
@@ -15,7 +15,7 @@
 entry:
   %l_10 = alloca [4 x i32], align 16
   %tmp = bitcast [4 x i32]* %l_10 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i1 false)
 ; CHECK: call void @llvm.memcpy
   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %l_10, i64 0, i64 0
   %call = call i32* @noop(i32* %arrayidx)
@@ -23,4 +23,4 @@
   ret i32 0
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
index af6d1f3..f641f46 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
@@ -13,7 +13,7 @@
 entry:
   %a = alloca <4 x float>, align 16
   %p = bitcast <4 x float>* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i1 false)
   %vec = load <4 x float>, <4 x float>* %a, align 8
   %val = extractelement <4 x float> %vec, i32 0
   ret float %val
@@ -27,11 +27,11 @@
 entry:
   %a = alloca { <4 x float> }, align 16
   %p = bitcast { <4 x float> }* %a to i8*
-  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i1 false)
   %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
   %arrayidx = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* %q, i32 0, i32 0
   store <2 x float> undef, <2 x float>* %arrayidx, align 8
   ret void
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll b/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
index 9e31231..3ea4971 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
@@ -12,11 +12,11 @@
 
 for_test158.preheader:                            ; preds = %entry
   %a156286305 = bitcast [4 x <4 x float>]* %a156286 to i8*
-  call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i1 false)
   unreachable
 
 cif_done:                                         ; preds = %entry
   ret void
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll b/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
index 51d1d14..abe0422 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
@@ -17,10 +17,10 @@
   store double 1.000000e+00, double* %b, align 8
   %0 = bitcast %struct.S* %retval to i8*
   %1 = bitcast %struct.S* %ret to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i1 false)
   %2 = bitcast %struct.S* %retval to double*
   %3 = load double, double* %2, align 1
   ret double %3
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/address-space.ll b/llvm/test/Transforms/ScalarRepl/address-space.ll
index b8b90ef..b34e5e34 100644
--- a/llvm/test/Transforms/ScalarRepl/address-space.ll
+++ b/llvm/test/Transforms/ScalarRepl/address-space.ll
@@ -16,7 +16,7 @@
   %arrayidx = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
   %tmp1 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
   %tmp2 = bitcast %struct.anon addrspace(2)* %arrayidx to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i1 false)
   %tmp3 = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1]
   %arrayidx4 = getelementptr inbounds [1 x float], [1 x float]* %tmp3, i32 0, i64 0 ; <float*> [#uses=2]
   %tmp5 = load float, float* %arrayidx4                  ; <float> [#uses=1]
@@ -25,11 +25,11 @@
   %arrayidx7 = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
   %tmp8 = bitcast %struct.anon addrspace(2)* %arrayidx7 to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
   %tmp9 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i32 4, i1 false)
+  call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind
 
-declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i1) nounwind
 
diff --git a/llvm/test/Transforms/ScalarRepl/badarray.ll b/llvm/test/Transforms/ScalarRepl/badarray.ll
index 6f5bc95..ed6ab2e 100644
--- a/llvm/test/Transforms/ScalarRepl/badarray.ll
+++ b/llvm/test/Transforms/ScalarRepl/badarray.ll
@@ -48,10 +48,10 @@
   %callret = call %padded *@test3f() ; <i32> [#uses=2]
   %callretcast = bitcast %padded* %callret to i8*                     ; <i8*> [#uses=1]
   %var_11 = bitcast %padded* %var_1 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 declare %padded* @test3f()
diff --git a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
index 97977db..79273dd 100644
--- a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -75,9 +75,9 @@
   %var = alloca [4 x %padded], align 4
   %vari8 = bitcast [4 x %padded]* %var to i8*
   %pi8 = bitcast [4 x %padded]* %p to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i1 false)
   %qi8 = bitcast [4 x %padded]* %q to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i1 false)
   ret void
 }
 
@@ -98,10 +98,10 @@
   store %homogeneous %res, %homogeneous* %varcast
   %tmp1 = bitcast %wrapped_array* %arr to i8*
   %tmp2 = bitcast %wrapped_array* %var to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i1 false)
   ret void
 }
 
 declare %homogeneous @test6callee(i8* nocapture) nounwind
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/crash.ll b/llvm/test/Transforms/ScalarRepl/crash.ll
index 72e9f09..715f739 100644
--- a/llvm/test/Transforms/ScalarRepl/crash.ll
+++ b/llvm/test/Transforms/ScalarRepl/crash.ll
@@ -157,7 +157,7 @@
 
 cond_next:              ; preds = %cond_true
         %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i1 false)
         br i1 false, label %cond_next34, label %cond_next79
 
 cond_next34:            ; preds = %cond_next
@@ -195,7 +195,7 @@
         %.compoundliteral = alloca %0           
         %tmp228 = getelementptr %0, %0* %.compoundliteral, i32 0, i32 7
         %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
-        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false)
+        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i1 false)
         unreachable
 }
 
@@ -282,5 +282,5 @@
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/inline-vector.ll b/llvm/test/Transforms/ScalarRepl/inline-vector.ll
index 85f3741..422d4dd 100644
--- a/llvm/test/Transforms/ScalarRepl/inline-vector.ll
+++ b/llvm/test/Transforms/ScalarRepl/inline-vector.ll
@@ -16,7 +16,7 @@
   %vector = alloca %struct.Vector4, align 16
   %agg.tmp = alloca %struct.Vector4, align 16
   %tmp = bitcast %struct.Vector4* %vector to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i1 false)
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %entry
@@ -28,7 +28,7 @@
 for.body:                                         ; preds = %for.cond
   %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
   %tmp3 = bitcast %struct.Vector4* %vector to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i1 false)
   %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
   %1 = load [2 x i64], [2 x i64]* %0, align 16
   %tmp2.i = extractvalue [2 x i64] %1, 0
@@ -49,5 +49,5 @@
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 declare i32 @printf(...)
diff --git a/llvm/test/Transforms/ScalarRepl/memcpy-align.ll b/llvm/test/Transforms/ScalarRepl/memcpy-align.ll
index 29a1bb8..4c9621a 100644
--- a/llvm/test/Transforms/ScalarRepl/memcpy-align.ll
+++ b/llvm/test/Transforms/ScalarRepl/memcpy-align.ll
@@ -13,9 +13,9 @@
 entry:
   %x0 = alloca %struct.anon, align 4              ; <%struct.anon*> [#uses=2]
   %tmp = bitcast %struct.anon* %x0 to i8*         ; <i8*> [#uses=1]
-  call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i1 false)
   %tmp1 = bitcast %struct.anon* %x0 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%0, %0* @c, i32
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds (%0, %0* @c, i32
 0, i32 0, i32 0, i32 0), i8* %tmp1, i32 4, i32 4, i1 false)
   ret void
   
@@ -25,7 +25,7 @@
 ; CHECK: store i8 0, i8*{{.*}}, align 1
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
index e8088c1..dc01d2b 100644
--- a/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ b/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@@ -14,10 +14,10 @@
 	%L = alloca %struct.foo, align 2		; <%struct.foo*> [#uses=1]
 	%L2 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i8*> [#uses=2]
 	%tmp13 = getelementptr %struct.foo, %struct.foo* %P, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i32 1, i1 false)
+	call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i1 false)
 	%tmp5 = load i8, i8* %L2		; <i8> [#uses=1]
 	%tmp56 = sext i8 %tmp5 to i32		; <i32> [#uses=1]
 	ret i32 %tmp56
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
diff --git a/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll b/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
index 98e2ddd..abdfeab 100644
--- a/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -14,7 +14,7 @@
 	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
 	%L2 = bitcast %struct.foo* %L to i8*		; <i8*> [#uses=1]
 	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
-        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i1 false)
 	%tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
@@ -25,7 +25,7 @@
 entry:
 	%L = alloca [4 x %struct.foo], align 16		; <[4 x %struct.foo]*> [#uses=2]
 	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
-        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
+        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i1 false)
 	%tmp4 = getelementptr [4 x %struct.foo], [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
@@ -36,7 +36,7 @@
 entry:
 	%B = alloca %struct.bar, align 16		; <%struct.bar*> [#uses=4]
 	%B1 = bitcast %struct.bar* %B to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false)
+	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i1 false)
 	%tmp3 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp3
 	%tmp4 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
@@ -56,12 +56,12 @@
 	store i32 1, i32* %0, align 8
 	%1 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 1		; <i32*> [#uses=1]
 	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
+	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i1 false)
 	%3 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
 	%4 = load i32, i32* %3, align 8		; <i32> [#uses=1]
 	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
 	ret i16 %retval12
 }
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/negative-memset.ll b/llvm/test/Transforms/ScalarRepl/negative-memset.ll
index 458d961..480a607 100644
--- a/llvm/test/Transforms/ScalarRepl/negative-memset.ll
+++ b/llvm/test/Transforms/ScalarRepl/negative-memset.ll
@@ -11,10 +11,10 @@
   %buff = alloca [1 x i8], align 1
   store i32 0, i32* %retval
   %0 = bitcast [1 x i8]* %buff to i8*
-  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false)
   %arraydecay = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i32 0, i32 0
-  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false)	; Negative 8!
+  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i1 false)	; Negative 8!
   ret i32 0
 }
 
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
diff --git a/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll
index d0ed20b..72d135d 100644
--- a/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll
+++ b/llvm/test/Transforms/ScalarRepl/only-memcpy-uses.ll
@@ -14,14 +14,14 @@
   %agg.tmp = alloca %struct.S, align 4
   %tmp = bitcast %struct.S* %t to i8*
   %tmp1 = bitcast %struct.S* %s to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i1 false)
   %tmp2 = bitcast %struct.S* %agg.tmp to i8*
   %tmp3 = bitcast %struct.S* %t to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i1 false)
   %call = call i32 (...) @bazz(%struct.S* byval %agg.tmp)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
 
 declare i32 @bazz(...)
diff --git a/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll b/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
index 031ad5e..04d0659 100644
--- a/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -9,7 +9,7 @@
 	store <16 x float> %A, <16 x float>* %tmp
 	%s = bitcast <16 x float>* %tmp to i8*
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i32 16, i1 false)
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i1 false)
 	%R = load <16 x float>, <16 x float>* %tmp2
 	ret <16 x float> %R
 }
@@ -18,11 +18,11 @@
 	%tmp2 = alloca <16 x float>, align 16
 
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
-	call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i32 16, i1 false)
+	call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i1 false)
 	
 	%R = load <16 x float>, <16 x float>* %tmp2
 	ret <16 x float> %R
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll b/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll
index fd0a3d5..97da9e7 100644
--- a/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll
+++ b/llvm/test/Transforms/Util/combine-alias-scope-metadata.ll
@@ -4,16 +4,16 @@
 define void @test(i8* noalias dereferenceable(1) %in, i8* noalias dereferenceable(1) %out) {
   %tmp = alloca i8
   %tmp2 = alloca i8
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false)
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i32 8, i1 false), !alias.scope !4
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i32 8, i1 false), !alias.scope !5
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i1 false), !alias.scope !4
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i1 false), !alias.scope !5
 
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %tmp2, i64 1, i32 8, i1 false), !noalias !6
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %tmp2, i64 1, i1 false), !noalias !6
 
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
 
 !0 = !{!0}
 !1 = distinct !{!1, !0, !"in"}