Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)

Summary:
 This is a resurrection of work first proposed and discussed in Aug 2015:
   http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
   http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html

 The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.

 This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.

 In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
   require that the alignments for source & dest be equal.

 For example, code which used to read:
  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)

 Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.

s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g

 The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
   source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
        and those that use use MemIntrinsicInst::[get|set]Alignment() to use
        getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
        MemIntrinsicInst::[get|set]Alignment() methods.

Reviewers: pete, hfinkel, lhames, reames, bollu

Reviewed By: reames

Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits

Differential Revision: https://reviews.llvm.org/D41675

llvm-svn: 322965
diff --git a/llvm/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll b/llvm/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
index 24bcfae..2964b19 100644
--- a/llvm/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
+++ b/llvm/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
@@ -4,8 +4,8 @@
 
 define void @t(i8* %ptr) {
 entry:
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i64 7, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i64 7, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
index aca6aa5..3e30f75 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
@@ -10,9 +10,9 @@
 @i = global i32 12, align 4
 @dest = common global [50 x i8] zeroinitializer, align 1
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
 
 define void @cpy(i8* %src, i32 %i) {
   ; ALL-LABEL:  cpy:
@@ -28,8 +28,7 @@
   ; ALL:            jalr  $[[T2]]
   ; ALL-NEXT:       nop
   ; ALL-NOT:        {{.*}}$2{{.*}}
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0),
-                                       i8* %src, i32 %i, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false)
   ret void
 }
 
@@ -48,8 +47,7 @@
   ; ALL:            jalr  $[[T2]]
   ; ALL-NEXT:       nop
   ; ALL-NOT:        {{.*}}$2{{.*}}
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0),
-                                        i8* %src, i32 %i, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false)
   ret void
 }
 
@@ -68,7 +66,6 @@
   ; ALL:            jalr  $[[T2]]
   ; ALL-NEXT:       nop
   ; ALL-NOT:        {{.*}}$2{{.*}}
-  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0),
-                                  i8 42, i32 %i, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8 42, i32 %i, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/biggot.ll b/llvm/test/CodeGen/Mips/biggot.ll
index b266b5e..305dcf8 100644
--- a/llvm/test/CodeGen/Mips/biggot.ll
+++ b/llvm/test/CodeGen/Mips/biggot.ll
@@ -48,8 +48,8 @@
 
   %0 = bitcast i32* %d to i8*
   %1 = bitcast i32* %s to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 %n, i32 4, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %n, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll b/llvm/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
index 56f9a64..33d1a4f 100644
--- a/llvm/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
@@ -43,7 +43,7 @@
 declare void @fS1(i48 inreg) #1
 declare void @fS2(i40 inreg) #1
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #2
 
 define void @f1() #0 {
 entry:
@@ -51,7 +51,7 @@
   %s1_1.coerce = alloca { i48 }
   %0 = bitcast { i48 }* %s1_1.coerce to i8*
   %1 = bitcast %struct.S1* %s1_1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 6, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 6, i1 false)
   %2 = getelementptr { i48 }, { i48 }* %s1_1.coerce, i32 0, i32 0
   %3 = load i48, i48* %2, align 1
   call void @fS1(i48 inreg %3)
@@ -68,7 +68,7 @@
   %s2_1.coerce = alloca { i40 }
   %0 = bitcast { i40 }* %s2_1.coerce to i8*
   %1 = bitcast %struct.S2* %s2_1 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 5, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 5, i1 false)
   %2 = getelementptr { i40 }, { i40 }* %s2_1.coerce, i32 0, i32 0
   %3 = load i40, i40* %2, align 1
   call void @fS2(i40 inreg %3)
diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
index b41b5b7..5009c9e 100644
--- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
@@ -172,7 +172,7 @@
   %0 = load %struct.SmallStruct_3b*, %struct.SmallStruct_3b** %ss.addr, align 8
   %1 = bitcast { i24 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_3b* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 3, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 3, i1 false)
   %3 = getelementptr { i24 }, { i24 }* %.coerce, i32 0, i32 0
   %4 = load i24, i24* %3, align 1
   call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i24 inreg %4)
@@ -181,7 +181,7 @@
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 40
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
 
 define void @smallStruct_4b(%struct.SmallStruct_4b* %ss) #0 {
 entry:
@@ -205,7 +205,7 @@
   %0 = load %struct.SmallStruct_5b*, %struct.SmallStruct_5b** %ss.addr, align 8
   %1 = bitcast { i40 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_5b* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 5, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 5, i1 false)
   %3 = getelementptr { i40 }, { i40 }* %.coerce, i32 0, i32 0
   %4 = load i40, i40* %3, align 1
   call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i40 inreg %4)
@@ -222,7 +222,7 @@
   %0 = load %struct.SmallStruct_6b*, %struct.SmallStruct_6b** %ss.addr, align 8
   %1 = bitcast { i48 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_6b* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
   %4 = load i48, i48* %3, align 1
   call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
@@ -239,7 +239,7 @@
   %0 = load %struct.SmallStruct_7b*, %struct.SmallStruct_7b** %ss.addr, align 8
   %1 = bitcast { i56 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_7b* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 7, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 7, i1 false)
   %3 = getelementptr { i56 }, { i56 }* %.coerce, i32 0, i32 0
   %4 = load i56, i56* %3, align 1
   call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i56 inreg %4)
@@ -272,7 +272,7 @@
   %0 = load %struct.SmallStruct_9b*, %struct.SmallStruct_9b** %ss.addr, align 8
   %1 = bitcast { i64, i8 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_9b* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 9, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 9, i1 false)
   %3 = getelementptr { i64, i8 }, { i64, i8 }* %.coerce, i32 0, i32 0
   %4 = load i64, i64* %3, align 1
   %5 = getelementptr { i64, i8 }, { i64, i8 }* %.coerce, i32 0, i32 1
diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
index 8a20f5e..d3c8f28 100644
--- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
@@ -107,7 +107,7 @@
   %0 = load %struct.SmallStruct_1b1s1b*, %struct.SmallStruct_1b1s1b** %ss.addr, align 8
   %1 = bitcast { i48 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_1b1s1b* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
   %4 = load i48, i48* %3, align 1
   call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
@@ -116,7 +116,7 @@
  ; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
 
 define void @smallStruct_1s1i(%struct.SmallStruct_1s1i* %ss) #0 {
 entry:
@@ -141,7 +141,7 @@
   %0 = load %struct.SmallStruct_3b1s*, %struct.SmallStruct_3b1s** %ss.addr, align 8
   %1 = bitcast { i48 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_3b1s* %0 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
   %4 = load i48, i48* %3, align 1
   call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
diff --git a/llvm/test/CodeGen/Mips/cconv/return-struct.ll b/llvm/test/CodeGen/Mips/cconv/return-struct.ll
index 0997cfb..3ccef26 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-struct.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-struct.ll
@@ -18,7 +18,7 @@
 @struct_6xi32 = global {[6 x i32]} zeroinitializer
 @struct_128xi16 = global {[128 x i16]} zeroinitializer
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
 
 define inreg {i8} @ret_struct_i8() nounwind {
 entry:
@@ -50,7 +50,7 @@
 entry:
         %retval = alloca {i8,i8}, align 1
         %0 = bitcast {i8,i8}* %retval to i8*
-        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}, {i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}, {i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i1 false)
         %1 = bitcast {i8,i8}* %retval to {i16}*
         %2 = load volatile {i16}, {i16}* %1
         ret {i16} %2
@@ -144,7 +144,7 @@
 define void @ret_struct_128xi16({[128 x i16]}* sret %returnval) {
 entry:
         %0 = bitcast {[128 x i16]}* %returnval to i8*
-        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ({[128 x i16]}* @struct_128xi16 to i8*), i64 256, i32 2, i1 false)
+        call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 %0, i8* align 2 bitcast ({[128 x i16]}* @struct_128xi16 to i8*), i64 256, i1 false)
         ret void
 }
 
diff --git a/llvm/test/CodeGen/Mips/largeimmprinting.ll b/llvm/test/CodeGen/Mips/largeimmprinting.ll
index f27e114..6460260 100644
--- a/llvm/test/CodeGen/Mips/largeimmprinting.ll
+++ b/llvm/test/CodeGen/Mips/largeimmprinting.ll
@@ -26,11 +26,11 @@
 
   %agg.tmp = alloca %struct.S1, align 1
   %tmp = getelementptr inbounds %struct.S1, %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.S1, %struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp, i8* align 1 getelementptr inbounds (%struct.S1, %struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i1 false)
   call void @f2(%struct.S1* byval %agg.tmp) nounwind
   ret void
 }
 
 declare void @f2(%struct.S1* byval)
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
diff --git a/llvm/test/CodeGen/Mips/long-calls.ll b/llvm/test/CodeGen/Mips/long-calls.ll
index 8a95e9b..d4652a5 100644
--- a/llvm/test/CodeGen/Mips/long-calls.ll
+++ b/llvm/test/CodeGen/Mips/long-calls.ll
@@ -17,7 +17,7 @@
 ; RUN:   | FileCheck -check-prefix=ON64 %s
 
 declare void @callee()
-declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1)
 
 @val = internal unnamed_addr global [20 x i32] zeroinitializer, align 4
 
@@ -52,6 +52,6 @@
 ; ON64: jalr    $25
 
   call void @callee()
-  call void @llvm.memset.p0i8.i32(i8* bitcast ([20 x i32]* @val to i8*), i8 0, i32 80, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 4 bitcast ([20 x i32]* @val to i8*), i8 0, i32 80, i1 false)
   ret  void
 }
diff --git a/llvm/test/CodeGen/Mips/memcpy.ll b/llvm/test/CodeGen/Mips/memcpy.ll
index 5c4ebb2..0feb1fc 100644
--- a/llvm/test/CodeGen/Mips/memcpy.ll
+++ b/llvm/test/CodeGen/Mips/memcpy.ll
@@ -9,11 +9,11 @@
 ; CHECK-NOT: call16(memcpy
 
   %arraydecay = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 1, i32 0
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arraydecay, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %arraydecay, i8* align 1 getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i1 false)
   %arrayidx = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 1, i32 40
   store i8 %n, i8* %arrayidx, align 1
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
diff --git a/llvm/test/CodeGen/Mips/pr33978.ll b/llvm/test/CodeGen/Mips/pr33978.ll
index 19fa171..c3d6ee5 100644
--- a/llvm/test/CodeGen/Mips/pr33978.ll
+++ b/llvm/test/CodeGen/Mips/pr33978.ll
@@ -11,10 +11,10 @@
   %b = alloca [22 x i8]
   %c = bitcast [22 x i8]* %a to i8*
   %d = getelementptr inbounds [22 x i8], [22 x i8]* %b, i32 0, i32 2
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %c, i8* %d, i32 20, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %c, i8* %d, i32 20, i1 false)
   %e = getelementptr inbounds [22 x i8], [22 x i8]* %b, i32 0, i32 6
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %e, i32 12, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %e, i32 12, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
diff --git a/llvm/test/CodeGen/Mips/tailcall/tailcall.ll b/llvm/test/CodeGen/Mips/tailcall/tailcall.ll
index 02556fb..b17f9ef 100644
--- a/llvm/test/CodeGen/Mips/tailcall/tailcall.ll
+++ b/llvm/test/CodeGen/Mips/tailcall/tailcall.ll
@@ -267,7 +267,7 @@
 
 declare i32 @callee12()
 
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
 
 define i32 @caller12(%struct.S* nocapture byval %a0) nounwind {
 entry:
@@ -283,7 +283,7 @@
 ; PIC16: jalrc
 
   %0 = bitcast %struct.S* %a0 to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct.S* @gs1 to i8*), i8* %0, i32 8, i32 4, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast (%struct.S* @gs1 to i8*), i8* align 4 %0, i32 8, i1 false)
   %call = tail call i32 @callee12() nounwind
   ret i32 %call
 }