Re-land MachineInstr: Reason locally about some memory objects before going to AA.

Summary:
Reverts r311008 to reinstate r310825 with a fix.

Refine alias checking for pseudo vs value to be conservative.
This fixes the original failure in builtbot unittest SingleSource/UnitTests/2003-07-09-SignedArgs.

Reviewers: hfinkel, nemanjai, efriedma

Reviewed By: efriedma

Subscribers: bjope, mcrosier, nhaehnle, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D36900

llvm-svn: 312126
diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll
index 40ed607..54d38a9 100644
--- a/llvm/test/CodeGen/AArch64/func-calls.ll
+++ b/llvm/test/CodeGen/AArch64/func-calls.ll
@@ -130,11 +130,11 @@
                                    i32 42, i128 %val)
 ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
 ; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
-; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
+; CHECK: stp [[I128HI]], {{x[0-9]+}}, [sp, #24]
 
 ; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
 ; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
-; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
+; CHECK-NONEON: stp [[I128HI]], {{x[0-9]+}}, [sp, #24]
 ; CHECK: bl check_i128_stackalign
 
   call void @check_i128_regalign(i32 0, i128 42)
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll
index 975e5ae..9307b6a 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll
@@ -1531,7 +1531,7 @@
 ; CHECK-LABEL: merge_zr64_unalign:
 ; CHECK: // %entry
 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
-; STRICTALIGN: strb wzr,
+; STRICTALIGN: strb
 ; STRICTALIGN: strb
 ; STRICTALIGN: strb
 ; STRICTALIGN: strb
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 589b333..740a74a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -452,15 +452,15 @@
 ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8
 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12
 
-; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8
 ; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4
+; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8
 
 
 ; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8
 ; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12
 
-; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
 ; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4
+; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
 
 ; GCN-NEXT: s_swappc_b64
 ; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200
@@ -487,8 +487,8 @@
 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
 
 ; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200
-; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
 ; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
+; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
 ; GCN-NEXT: s_swappc_b64
 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16
 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index cb2495d..6d24334 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -179,8 +179,8 @@
 ; GCN-NOHSA: buffer_load_dwordx2
 ; GCN-HSA: flat_load_dwordx2
 
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
+; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}},
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}},
 ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1
@@ -188,8 +188,6 @@
 ; TODO: This should use DST, but for some there are redundant MOVs
 ; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal
 ; EGCM: 16
-; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal
-; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal
 define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
 entry:
   %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
@@ -202,8 +200,8 @@
 ; GCN-NOHSA: buffer_load_dwordx2
 ; GCN-HSA: flat_load_dwordx2
 
-; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
+; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}},
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}},
 ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index 3fe6bd26..d7ebd46 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -352,22 +352,22 @@
 
 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
 ; TODO: These should use DST, but for some there are redundant MOVs
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
-; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
 ; EG-DAG: 8
 ; EG-DAG: 8
 ; EG-DAG: 8
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index 7de3f3b..875af80 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -530,7 +530,6 @@
 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
 ; EG-DAG: LDS_WRITE
-; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
 define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
   %a = load i16, i16 addrspace(3)* %in
   %ext = zext i16 %a to i64
@@ -572,7 +571,6 @@
 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
 ; EG-DAG: LDS_WRITE
-; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
 define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
   %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
   %ext = zext <1 x i16> %load to <1 x i64>
diff --git a/llvm/test/CodeGen/ARM/2009-10-27-double-align.ll b/llvm/test/CodeGen/ARM/2009-10-27-double-align.ll
index 39f3292..98a89a0 100644
--- a/llvm/test/CodeGen/ARM/2009-10-27-double-align.ll
+++ b/llvm/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -1,13 +1,15 @@
-; RUN: llc < %s  -mtriple=arm-linux-gnueabi | FileCheck %s
-; RUN: llc < %s  -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=NOREGALLOC
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s --check-prefix=REGALLOC
 
 @.str = private constant [1 x i8] zeroinitializer, align 1
 
 define void @g() {
 entry:
 ;CHECK: [sp, #8]
-;CHECK: [sp, #12]
-;CHECK: [sp]
+;NOREGALLOC: [sp, #12]
+;NOREGALLOC: [sp]
+;REGALLOC: [sp]
+;REGALLOC: [sp, #12]
         tail call  void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
         ret void
 }
diff --git a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
index a633c02..6d62fd3 100644
--- a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
+++ b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
@@ -124,10 +124,10 @@
 ; BE-LABEL: i56_and_or:
 ; BE:       @ BB#0:
 ; BE-NEXT:    mov r1, r0
-; BE-NEXT:    mov r3, #128
-; BE-NEXT:    ldrh r2, [r1, #4]!
-; BE-NEXT:    strb r3, [r1, #2]
 ; BE-NEXT:    ldr r12, [r0]
+; BE-NEXT:    ldrh r2, [r1, #4]!
+; BE-NEXT:    mov r3, #128
+; BE-NEXT:    strb r3, [r1, #2]
 ; BE-NEXT:    lsl r2, r2, #8
 ; BE-NEXT:    orr r2, r2, r12, lsl #24
 ; BE-NEXT:    orr r2, r2, #384
diff --git a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
index 5425670..8059e4a 100644
--- a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
+++ b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
@@ -118,17 +118,17 @@
 ; X64:       # BB#0:
 ; X64-NEXT:    movzwl 4(%rdi), %eax
 ; X64-NEXT:    movzbl 6(%rdi), %ecx
-; X64-NEXT:    movl (%rdi), %edx
 ; X64-NEXT:    movb %cl, 6(%rdi)
 ; X64-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
 ; X64-NEXT:    shll $16, %ecx
 ; X64-NEXT:    orl %eax, %ecx
 ; X64-NEXT:    shlq $32, %rcx
-; X64-NEXT:    orq %rcx, %rdx
-; X64-NEXT:    orq $384, %rdx # imm = 0x180
-; X64-NEXT:    movl %edx, (%rdi)
-; X64-NEXT:    shrq $32, %rdx
-; X64-NEXT:    movw %dx, 4(%rdi)
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    orq $384, %rax # imm = 0x180
+; X64-NEXT:    movl %eax, (%rdi)
+; X64-NEXT:    shrq $32, %rax
+; X64-NEXT:    movw %ax, 4(%rdi)
 ; X64-NEXT:    retq
   %aa = load i56, i56* %a, align 1
   %b = or i56 %aa, 384
@@ -150,19 +150,19 @@
 ; X64:       # BB#0:
 ; X64-NEXT:    movzwl 4(%rdi), %eax
 ; X64-NEXT:    movzbl 6(%rdi), %ecx
-; X64-NEXT:    movl (%rdi), %edx
 ; X64-NEXT:    movb %cl, 6(%rdi)
 ; X64-NEXT:    # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
 ; X64-NEXT:    shll $16, %ecx
 ; X64-NEXT:    orl %eax, %ecx
 ; X64-NEXT:    shlq $32, %rcx
-; X64-NEXT:    orq %rcx, %rdx
-; X64-NEXT:    orq $384, %rdx # imm = 0x180
-; X64-NEXT:    movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80
-; X64-NEXT:    andq %rdx, %rax
-; X64-NEXT:    movl %eax, (%rdi)
-; X64-NEXT:    shrq $32, %rax
-; X64-NEXT:    movw %ax, 4(%rdi)
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    orq %rcx, %rax
+; X64-NEXT:    orq $384, %rax # imm = 0x180
+; X64-NEXT:    movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80
+; X64-NEXT:    andq %rax, %rcx
+; X64-NEXT:    movl %ecx, (%rdi)
+; X64-NEXT:    shrq $32, %rcx
+; X64-NEXT:    movw %cx, 4(%rdi)
 ; X64-NEXT:    retq
   %b = load i56, i56* %a, align 1
   %c = and i56 %b, -128
@@ -188,20 +188,20 @@
 ; X64-NEXT:    movzbl %sil, %eax
 ; X64-NEXT:    movzwl 4(%rdi), %ecx
 ; X64-NEXT:    movzbl 6(%rdi), %edx
-; X64-NEXT:    movl (%rdi), %esi
 ; X64-NEXT:    movb %dl, 6(%rdi)
 ; X64-NEXT:    # kill: %EDX<def> %EDX<kill> %RDX<kill> %RDX<def>
 ; X64-NEXT:    shll $16, %edx
 ; X64-NEXT:    orl %ecx, %edx
 ; X64-NEXT:    shlq $32, %rdx
-; X64-NEXT:    orq %rdx, %rsi
+; X64-NEXT:    movl (%rdi), %ecx
+; X64-NEXT:    orq %rdx, %rcx
 ; X64-NEXT:    shlq $13, %rax
-; X64-NEXT:    movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF
-; X64-NEXT:    andq %rsi, %rcx
-; X64-NEXT:    orq %rax, %rcx
-; X64-NEXT:    movl %ecx, (%rdi)
-; X64-NEXT:    shrq $32, %rcx
-; X64-NEXT:    movw %cx, 4(%rdi)
+; X64-NEXT:    movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF
+; X64-NEXT:    andq %rcx, %rdx
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    movl %edx, (%rdi)
+; X64-NEXT:    shrq $32, %rdx
+; X64-NEXT:    movw %dx, 4(%rdi)
 ; X64-NEXT:    retq
   %extbit = zext i1 %bit to i56
   %b = load i56, i56* %a, align 1
diff --git a/llvm/test/CodeGen/X86/memcpy-2.ll b/llvm/test/CodeGen/X86/memcpy-2.ll
index 7ef61c9..bd8f6e91 100644
--- a/llvm/test/CodeGen/X86/memcpy-2.ll
+++ b/llvm/test/CodeGen/X86/memcpy-2.ll
@@ -12,23 +12,23 @@
 define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
 ; SSE2-Darwin-LABEL: t1:
-; SSE2-Darwin: movsd _.str+16, %xmm0
-; SSE2-Darwin: movsd %xmm0, 16(%esp)
 ; SSE2-Darwin: movaps _.str, %xmm0
 ; SSE2-Darwin: movaps %xmm0
+; SSE2-Darwin: movsd _.str+16, %xmm0
+; SSE2-Darwin: movsd %xmm0, 16(%esp)
 ; SSE2-Darwin: movb $0, 24(%esp)
 
 ; SSE2-Mingw32-LABEL: t1:
-; SSE2-Mingw32: movsd _.str+16, %xmm0
-; SSE2-Mingw32: movsd %xmm0, 16(%esp)
 ; SSE2-Mingw32: movaps _.str, %xmm0
 ; SSE2-Mingw32: movups %xmm0
+; SSE2-Mingw32: movsd _.str+16, %xmm0
+; SSE2-Mingw32: movsd %xmm0, 16(%esp)
 ; SSE2-Mingw32: movb $0, 24(%esp)
 
 ; SSE1-LABEL: t1:
 ; SSE1: movaps _.str, %xmm0
-; SSE1: movaps %xmm0
 ; SSE1: movb $0, 24(%esp)
+; SSE1: movaps %xmm0
 ; SSE1: movl $0, 20(%esp)
 ; SSE1: movl $0, 16(%esp)
 
diff --git a/llvm/test/CodeGen/X86/pr34088.ll b/llvm/test/CodeGen/X86/pr34088.ll
index d3667e3..259c735 100644
--- a/llvm/test/CodeGen/X86/pr34088.ll
+++ b/llvm/test/CodeGen/X86/pr34088.ll
@@ -25,8 +25,8 @@
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movaps %xmm0, (%esp)
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    movaps %xmm1, (%esp)
 ; CHECK-NEXT:    movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
+; CHECK-NEXT:    movaps %xmm1, (%esp)
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 34a2d22..8c5c0ed 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -349,8 +349,8 @@
 ; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
 ; ATOM-NEXT:    paddd %xmm2, %xmm0
 ; ATOM-NEXT:    paddd %xmm2, %xmm1
-; ATOM-NEXT:    movq %xmm1, 16(%rsi)
 ; ATOM-NEXT:    movdqa %xmm0, (%rsi)
+; ATOM-NEXT:    movq %xmm1, 16(%rsi)
 ; ATOM-NEXT:    retq
 ; ATOM-NEXT:    ## -- End function
 ;
diff --git a/llvm/test/CodeGen/X86/widen_arith-3.ll b/llvm/test/CodeGen/X86/widen_arith-3.ll
index e363a82..d53e828 100644
--- a/llvm/test/CodeGen/X86/widen_arith-3.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-3.ll
@@ -16,9 +16,9 @@
 ; CHECK-NEXT:    movl {{\.LCPI.*}}, %eax
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:    movw $1, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    movw $1, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_2: # %forbody