Fix several accidental DOS line endings in source files

Summary:
There are a number of files in the tree which have been accidentally checked in with DOS line endings.  Convert these to native line endings.

There are also a few files which have DOS line endings on purpose, and I have set the svn:eol-style property to 'CRLF' on those.

Reviewers: joerg, aaron.ballman

Subscribers: aaron.ballman, sanjoy, dsanders, llvm-commits

Differential Revision: http://reviews.llvm.org/D15848

llvm-svn: 256707
diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
index 994a995..921cf6a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,27 +1,27 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s

-

-;CHECK: @func30

-;CHECK: movi.4h v1, #0x1

-;CHECK: and.8b v0, v0, v1

-;CHECK: ushll.4s  v0, v0, #0

-;CHECK: str  q0, [x0]

-;CHECK: ret

-

-%T0_30 = type <4 x i1>

-%T1_30 = type <4 x i32>

-define void @func30(%T0_30 %v0, %T1_30* %p1) {

-  %r = zext %T0_30 %v0 to %T1_30

-  store %T1_30 %r, %T1_30* %p1

-  ret void

-}

-

-; Extend from v1i1 was crashing things (PR20791). Make sure we do something

-; sensible instead.

-define <1 x i32> @autogen_SD7918() {

-; CHECK-LABEL: autogen_SD7918

-; CHECK: movi d0, #0000000000000000

-; CHECK-NEXT: ret

-  %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0

-  %ZE = zext <1 x i1> %I29 to <1 x i32>

-  ret <1 x i32> %ZE

-}

+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func30
+;CHECK: movi.4h v1, #0x1
+;CHECK: and.8b v0, v0, v1
+;CHECK: ushll.4s  v0, v0, #0
+;CHECK: str  q0, [x0]
+;CHECK: ret
+
+%T0_30 = type <4 x i1>
+%T1_30 = type <4 x i32>
+define void @func30(%T0_30 %v0, %T1_30* %p1) {
+  %r = zext %T0_30 %v0 to %T1_30
+  store %T1_30 %r, %T1_30* %p1
+  ret void
+}
+
+; Extend from v1i1 was crashing things (PR20791). Make sure we do something
+; sensible instead.
+define <1 x i32> @autogen_SD7918() {
+; CHECK-LABEL: autogen_SD7918
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+  %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
+  %ZE = zext <1 x i1> %I29 to <1 x i32>
+  ret <1 x i32> %ZE
+}
diff --git a/llvm/test/CodeGen/ARM/debugtrap.ll b/llvm/test/CodeGen/ARM/debugtrap.ll
index 9ce7393..3d8cdea 100644
--- a/llvm/test/CodeGen/ARM/debugtrap.ll
+++ b/llvm/test/CodeGen/ARM/debugtrap.ll
@@ -1,17 +1,17 @@
-; This test ensures the @llvm.debugtrap() call is not removed when generating

-; the 'pop' instruction to restore the callee saved registers on ARM.

-

-; RUN: llc < %s -mtriple=armv7 -O0 -filetype=asm | FileCheck %s 

-

-declare void @llvm.debugtrap() nounwind

-declare void @foo() nounwind

-

-define void @test() nounwind {

-entry:

-  ; CHECK: bl foo

-  ; CHECK-NEXT: pop

-  ; CHECK-NEXT: trap

-  call void @foo()

-  call void @llvm.debugtrap()

-  ret void

-}

+; This test ensures the @llvm.debugtrap() call is not removed when generating
+; the 'pop' instruction to restore the callee saved registers on ARM.
+
+; RUN: llc < %s -mtriple=armv7 -O0 -filetype=asm | FileCheck %s 
+
+declare void @llvm.debugtrap() nounwind
+declare void @foo() nounwind
+
+define void @test() nounwind {
+entry:
+  ; CHECK: bl foo
+  ; CHECK-NEXT: pop
+  ; CHECK-NEXT: trap
+  call void @foo()
+  call void @llvm.debugtrap()
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/2011-11-30-or.ll b/llvm/test/CodeGen/X86/2011-11-30-or.ll
index 4260e81..8378a02 100644
--- a/llvm/test/CodeGen/X86/2011-11-30-or.ll
+++ b/llvm/test/CodeGen/X86/2011-11-30-or.ll
@@ -2,13 +2,13 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "x86_64-apple-macosx10.6.6"
-

-; Test that the order of operands is correct

-; CHECK: select_func

-; CHECK: pblendvb        {{LCPI0_[0-9]*}}(%rip), %xmm1

-; CHECK: ret

-

-define void @select_func(<8 x i16> %in) {

+
+; Test that the order of operands is correct
+; CHECK: select_func
+; CHECK: pblendvb        {{LCPI0_[0-9]*}}(%rip), %xmm1
+; CHECK: ret
+
+define void @select_func(<8 x i16> %in) {
 entry:
   %c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
diff --git a/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll b/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll
index 29f17bb..febd3d6 100644
--- a/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cd-intrinsics.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s

-

-define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {

-  ; CHECK: test_x86_vbroadcastmw_512

-  ; CHECK: vpbroadcastmw2d %k0, %zmm0

-  %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ; 

-  ret <16 x i32> %res

-}

-declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)

-

-define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {

-  ; CHECK: test_x86_broadcastmb_512

-  ; CHECK: vpbroadcastmb2q %k0, %zmm0

-  %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ; 

-  ret <8 x i64> %res

-}

-declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)

-

+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s
+
+define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
+  ; CHECK: test_x86_vbroadcastmw_512
+  ; CHECK: vpbroadcastmw2d %k0, %zmm0
+  %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ; 
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
+
+define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
+  ; CHECK: test_x86_broadcastmb_512
+  ; CHECK: vpbroadcastmb2q %k0, %zmm0
+  %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ; 
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
+
diff --git a/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll b/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
index 58d5701..dac468e 100644
--- a/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
+++ b/llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
@@ -1,127 +1,127 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium -mtriple=x86-linux-gnu -float-abi=soft | FileCheck %s 

-

-define i1 @test1(double %d) #0 {

-entry:

-  %cmp = fcmp ule double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test1:

-; CHECK: calll __gtdf2

-; CHECK: setle

-; CHECK: retl

- 

-define i1 @test2(double %d) #0 {

-entry:

-  %cmp = fcmp ult double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test2:

-; CHECK: calll __gedf2

-; CHECK: sets

-; CHECK: retl

-

-define i1 @test3(double %d) #0 {

-entry:

-  %cmp = fcmp ugt double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test3:

-; CHECK: calll __ledf2

-; CHECK: setg

-; CHECK: retl

-

-define i1 @test4(double %d) #0 {

-entry:

-  %cmp = fcmp uge double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test4:

-; CHECK: calll __ltdf2

-; CHECK: setns

-; CHECK: retl

-

-define i1 @test5(double %d) #0 {

-entry:

-  %cmp = fcmp ole double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test5:  

-; CHECK: calll __ledf2

-; CHECK: setle

-; CHECK: retl

-

-define i1 @test6(double %d) #0 {

-entry:

-  %cmp = fcmp olt double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test6:

-; CHECK: calll __ltdf2

-; CHECK: sets

-; CHECK: retl

-

-define i1 @test7(double %d) #0 {

-entry:

-  %cmp = fcmp ogt double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test7:

-; CHECK: calll __gtdf2

-; CHECK: setg

-; CHECK: retl

-

-define i1 @test8(double %d) #0 {

-entry:

-  %cmp = fcmp oge double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test8:

-; CHECK: calll __gedf2

-; CHECK: setns

-; CHECK: retl

-

-define i1 @test9(double %d) #0 {

-entry:

-  %cmp = fcmp oeq double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test9:

-; CHECK: calll __eqdf2

-; CHECK: sete

-; CHECK: retl

-

-define i1 @test10(double %d) #0 {

-entry:

-  %cmp = fcmp ueq double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test10:

-; CHECK: calll __eqdf2

-; CHECK: sete

-; CHECK: calll __unorddf2

-; CHECK: setne

-; CHECK: retl

-

-define i1 @test11(double %d) #0 {

-entry:

-  %cmp = fcmp one double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test11:

-; CHECK: calll __gtdf2

-; CHECK: setg

-; CHECK: calll __ltdf2

-; CHECK: sets

-; CHECK: retl

-

-define i1 @test12(double %d) #0 {

-entry:

-  %cmp = fcmp une double %d, 0.000000e+00

-  ret i1 %cmp

-}

-; CHECK-LABEL: test12:

-; CHECK: calll __nedf2

-; CHECK: setne

-; CHECK: retl

-

-attributes #0 = { "use-soft-float"="true" }

+; RUN: llc < %s -march=x86 -mcpu=pentium -mtriple=x86-linux-gnu -float-abi=soft | FileCheck %s 
+
+define i1 @test1(double %d) #0 {
+entry:
+  %cmp = fcmp ule double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test1:
+; CHECK: calll __gtdf2
+; CHECK: setle
+; CHECK: retl
+ 
+define i1 @test2(double %d) #0 {
+entry:
+  %cmp = fcmp ult double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test2:
+; CHECK: calll __gedf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test3(double %d) #0 {
+entry:
+  %cmp = fcmp ugt double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test3:
+; CHECK: calll __ledf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test4(double %d) #0 {
+entry:
+  %cmp = fcmp uge double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test4:
+; CHECK: calll __ltdf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test5(double %d) #0 {
+entry:
+  %cmp = fcmp ole double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test5:  
+; CHECK: calll __ledf2
+; CHECK: setle
+; CHECK: retl
+
+define i1 @test6(double %d) #0 {
+entry:
+  %cmp = fcmp olt double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test6:
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test7(double %d) #0 {
+entry:
+  %cmp = fcmp ogt double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test7:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test8(double %d) #0 {
+entry:
+  %cmp = fcmp oge double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test8:
+; CHECK: calll __gedf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test9(double %d) #0 {
+entry:
+  %cmp = fcmp oeq double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test9:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: retl
+
+define i1 @test10(double %d) #0 {
+entry:
+  %cmp = fcmp ueq double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test10:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: calll __unorddf2
+; CHECK: setne
+; CHECK: retl
+
+define i1 @test11(double %d) #0 {
+entry:
+  %cmp = fcmp one double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test11:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test12(double %d) #0 {
+entry:
+  %cmp = fcmp une double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test12:
+; CHECK: calll __nedf2
+; CHECK: setne
+; CHECK: retl
+
+attributes #0 = { "use-soft-float"="true" }
diff --git a/llvm/test/CodeGen/X86/pku.ll b/llvm/test/CodeGen/X86/pku.ll
index cbb83ae..8568cf4 100644
--- a/llvm/test/CodeGen/X86/pku.ll
+++ b/llvm/test/CodeGen/X86/pku.ll
@@ -1,25 +1,25 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s

-declare i32 @llvm.x86.rdpkru()

-declare void @llvm.x86.wrpkru(i32)

-

-define void @test_x86_wrpkru(i32 %src) {

-; CHECK-LABEL: test_x86_wrpkru:

-; CHECK:       ## BB#0:

-; CHECK-NEXT:    xorl    %ecx, %ecx

-; CHECK-NEXT:    xorl    %edx, %edx

-; CHECK-NEXT:    movl    %edi, %eax

-; CHECK-NEXT:    wrpkru

-; CHECK-NEXT:    retq

-  call void @llvm.x86.wrpkru(i32 %src) 

-  ret void

-}

-

-define i32 @test_x86_rdpkru() {

-; CHECK-LABEL: test_x86_rdpkru:

-; CHECK:      ## BB#0:

-; CHECK-NEXT: xorl    %ecx, %ecx

-; CHECK-NEXT: rdpkru

-; CHECK-NEXT: retq

-  %res = call i32 @llvm.x86.rdpkru() 

-  ret i32 %res 

-}

+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+declare i32 @llvm.x86.rdpkru()
+declare void @llvm.x86.wrpkru(i32)
+
+define void @test_x86_wrpkru(i32 %src) {
+; CHECK-LABEL: test_x86_wrpkru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    xorl    %ecx, %ecx
+; CHECK-NEXT:    xorl    %edx, %edx
+; CHECK-NEXT:    movl    %edi, %eax
+; CHECK-NEXT:    wrpkru
+; CHECK-NEXT:    retq
+  call void @llvm.x86.wrpkru(i32 %src) 
+  ret void
+}
+
+define i32 @test_x86_rdpkru() {
+; CHECK-LABEL: test_x86_rdpkru:
+; CHECK:      ## BB#0:
+; CHECK-NEXT: xorl    %ecx, %ecx
+; CHECK-NEXT: rdpkru
+; CHECK-NEXT: retq
+  %res = call i32 @llvm.x86.rdpkru() 
+  ret i32 %res 
+}
diff --git a/llvm/test/CodeGen/X86/pr21792.ll b/llvm/test/CodeGen/X86/pr21792.ll
index 59866c0..f6dca60 100644
--- a/llvm/test/CodeGen/X86/pr21792.ll
+++ b/llvm/test/CodeGen/X86/pr21792.ll
@@ -1,41 +1,41 @@
-; RUN: llc -mtriple=x86_64-linux -mcpu=corei7 < %s | FileCheck %s

-; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by

-; PR21792

-

-@stuff = external constant [256 x double], align 16

-

-define void @func(<4 x float> %vx) {

-entry:

-  %tmp2 = bitcast <4 x float> %vx to <2 x i64>

-  %and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>

-  %tmp3 = bitcast <2 x i64> %and.i to <4 x i32>

-  %index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0

-  %idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64

-  %add.ptr = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext

-  %tmp4 = bitcast i8* %add.ptr to double*

-  %index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1

-  %idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64

-  %add.ptr6 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5

-  %tmp5 = bitcast i8* %add.ptr6 to double*

-  %index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2

-  %idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64

-  %add.ptr15 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14

-  %tmp6 = bitcast i8* %add.ptr15 to double*

-  %index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3

-  %idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64

-  %add.ptr20 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19

-  %tmp7 = bitcast i8* %add.ptr20 to double*

-  %add.ptr46 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext

-  %tmp16 = bitcast i8* %add.ptr46 to double*

-  %add.ptr51 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5

-  %tmp17 = bitcast i8* %add.ptr51 to double*

-  call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)

-  ret void

-; CHECK-LABEL: func:

-; CHECK: pextrq  $1, %xmm0,

-; CHECK-NEXT: movd    %xmm0, %r[[AX:..]]

-; CHECK-NEXT: movslq  %e[[AX]],

-; CHECK-NEXT: sarq    $32, %r[[AX]]

-}

-

-declare void @toto(double*, double*, double*, double*, double*, double*)

+; RUN: llc -mtriple=x86_64-linux -mcpu=corei7 < %s | FileCheck %s
+; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by
+; PR21792
+
+@stuff = external constant [256 x double], align 16
+
+define void @func(<4 x float> %vx) {
+entry:
+  %tmp2 = bitcast <4 x float> %vx to <2 x i64>
+  %and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>
+  %tmp3 = bitcast <2 x i64> %and.i to <4 x i32>
+  %index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0
+  %idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64
+  %add.ptr = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext
+  %tmp4 = bitcast i8* %add.ptr to double*
+  %index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1
+  %idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64
+  %add.ptr6 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5
+  %tmp5 = bitcast i8* %add.ptr6 to double*
+  %index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2
+  %idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64
+  %add.ptr15 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14
+  %tmp6 = bitcast i8* %add.ptr15 to double*
+  %index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3
+  %idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64
+  %add.ptr20 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19
+  %tmp7 = bitcast i8* %add.ptr20 to double*
+  %add.ptr46 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext
+  %tmp16 = bitcast i8* %add.ptr46 to double*
+  %add.ptr51 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5
+  %tmp17 = bitcast i8* %add.ptr51 to double*
+  call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)
+  ret void
+; CHECK-LABEL: func:
+; CHECK: pextrq  $1, %xmm0,
+; CHECK-NEXT: movd    %xmm0, %r[[AX:..]]
+; CHECK-NEXT: movslq  %e[[AX]],
+; CHECK-NEXT: sarq    $32, %r[[AX]]
+}
+
+declare void @toto(double*, double*, double*, double*, double*, double*)
diff --git a/llvm/test/CodeGen/X86/pr24139.ll b/llvm/test/CodeGen/X86/pr24139.ll
index fbe55ab..ec56345 100644
--- a/llvm/test/CodeGen/X86/pr24139.ll
+++ b/llvm/test/CodeGen/X86/pr24139.ll
@@ -1,148 +1,148 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s

-

-; Check that we do not get excessive spilling from splitting of constant live ranges.

-

-; CHECK-LABEL: PR24139:

-; CHECK: # 16-byte Spill

-; CHECK-NOT: # 16-byte Spill

-; CHECK: retq

-

-define <2 x double> @PR24139(<2 x double> %arg, <2 x double> %arg1, <2 x double> %arg2) {

-  %tmp = bitcast <2 x double> %arg to <4 x float>

-  %tmp3 = fmul <4 x float> %tmp, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>

-  %tmp4 = bitcast <2 x double> %arg to <4 x i32>

-  %tmp5 = and <4 x i32> %tmp4, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>

-  %tmp6 = or <4 x i32> %tmp5, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>

-  %tmp7 = bitcast <4 x i32> %tmp6 to <4 x float>

-  %tmp8 = fadd <4 x float> %tmp3, %tmp7

-  %tmp9 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp8) #2

-  %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>

-  %tmp11 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp9) #2

-  %tmp12 = fmul <4 x float> %tmp11, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>

-  %tmp13 = fsub <4 x float> %tmp, %tmp12

-  %tmp14 = fmul <4 x float> %tmp11, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>

-  %tmp15 = fsub <4 x float> %tmp13, %tmp14

-  %tmp16 = fmul <4 x float> %tmp15, %tmp15

-  %tmp17 = fmul <4 x float> %tmp15, %tmp16

-  %tmp18 = fmul <4 x float> %tmp16, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>

-  %tmp19 = fadd <4 x float> %tmp18, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>

-  %tmp20 = fmul <4 x float> %tmp16, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>

-  %tmp21 = fadd <4 x float> %tmp20, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>

-  %tmp22 = fmul <4 x float> %tmp16, %tmp19

-  %tmp23 = fadd <4 x float> %tmp22, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>

-  %tmp24 = fmul <4 x float> %tmp16, %tmp21

-  %tmp25 = fadd <4 x float> %tmp24, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>

-  %tmp26 = fmul <4 x float> %tmp16, %tmp23

-  %tmp27 = fadd <4 x float> %tmp26, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>

-  %tmp28 = fmul <4 x float> %tmp17, %tmp25

-  %tmp29 = fadd <4 x float> %tmp15, %tmp28

-  %tmp30 = and <2 x i64> %tmp10, <i64 4294967297, i64 4294967297>

-  %tmp31 = bitcast <2 x i64> %tmp30 to <4 x i32>

-  %tmp32 = icmp eq <4 x i32> %tmp31, zeroinitializer

-  %tmp33 = sext <4 x i1> %tmp32 to <4 x i32>

-  %tmp34 = bitcast <4 x i32> %tmp33 to <4 x float>

-  %tmp35 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp27, <4 x float> %tmp29, <4 x float> %tmp34) #2

-  %tmp36 = and <2 x i64> %tmp10, <i64 8589934594, i64 8589934594>

-  %tmp37 = bitcast <2 x i64> %tmp36 to <4 x i32>

-  %tmp38 = icmp eq <4 x i32> %tmp37, zeroinitializer

-  %tmp39 = sext <4 x i1> %tmp38 to <4 x i32>

-  %tmp40 = bitcast <4 x float> %tmp35 to <4 x i32>

-  %tmp41 = xor <4 x i32> %tmp40, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>

-  %tmp42 = bitcast <4 x i32> %tmp41 to <4 x float>

-  %tmp43 = bitcast <4 x i32> %tmp39 to <4 x float>

-  %tmp44 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp42, <4 x float> %tmp35, <4 x float> %tmp43) #2

-  %tmp45 = bitcast <2 x double> %arg1 to <4 x float>

-  %tmp46 = fmul <4 x float> %tmp45, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>

-  %tmp47 = bitcast <2 x double> %arg1 to <4 x i32>

-  %tmp48 = and <4 x i32> %tmp47, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>

-  %tmp49 = or <4 x i32> %tmp48, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>

-  %tmp50 = bitcast <4 x i32> %tmp49 to <4 x float>

-  %tmp51 = fadd <4 x float> %tmp46, %tmp50

-  %tmp52 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp51) #2

-  %tmp53 = bitcast <4 x i32> %tmp52 to <2 x i64>

-  %tmp54 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp52) #2

-  %tmp55 = fmul <4 x float> %tmp54, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>

-  %tmp56 = fsub <4 x float> %tmp45, %tmp55

-  %tmp57 = fmul <4 x float> %tmp54, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>

-  %tmp58 = fsub <4 x float> %tmp56, %tmp57

-  %tmp59 = fmul <4 x float> %tmp58, %tmp58

-  %tmp60 = fmul <4 x float> %tmp58, %tmp59

-  %tmp61 = fmul <4 x float> %tmp59, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>

-  %tmp62 = fadd <4 x float> %tmp61, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>

-  %tmp63 = fmul <4 x float> %tmp59, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>

-  %tmp64 = fadd <4 x float> %tmp63, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>

-  %tmp65 = fmul <4 x float> %tmp59, %tmp62

-  %tmp66 = fadd <4 x float> %tmp65, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>

-  %tmp67 = fmul <4 x float> %tmp59, %tmp64

-  %tmp68 = fadd <4 x float> %tmp67, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>

-  %tmp69 = fmul <4 x float> %tmp59, %tmp66

-  %tmp70 = fadd <4 x float> %tmp69, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>

-  %tmp71 = fmul <4 x float> %tmp60, %tmp68

-  %tmp72 = fadd <4 x float> %tmp58, %tmp71

-  %tmp73 = and <2 x i64> %tmp53, <i64 4294967297, i64 4294967297>

-  %tmp74 = bitcast <2 x i64> %tmp73 to <4 x i32>

-  %tmp75 = icmp eq <4 x i32> %tmp74, zeroinitializer

-  %tmp76 = sext <4 x i1> %tmp75 to <4 x i32>

-  %tmp77 = bitcast <4 x i32> %tmp76 to <4 x float>

-  %tmp78 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp70, <4 x float> %tmp72, <4 x float> %tmp77) #2

-  %tmp79 = and <2 x i64> %tmp53, <i64 8589934594, i64 8589934594>

-  %tmp80 = bitcast <2 x i64> %tmp79 to <4 x i32>

-  %tmp81 = icmp eq <4 x i32> %tmp80, zeroinitializer

-  %tmp82 = sext <4 x i1> %tmp81 to <4 x i32>

-  %tmp83 = bitcast <4 x float> %tmp78 to <4 x i32>

-  %tmp84 = xor <4 x i32> %tmp83, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>

-  %tmp85 = bitcast <4 x i32> %tmp84 to <4 x float>

-  %tmp86 = bitcast <4 x i32> %tmp82 to <4 x float>

-  %tmp87 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp85, <4 x float> %tmp78, <4 x float> %tmp86) #2

-  %tmp88 = fadd <4 x float> %tmp44, %tmp87

-  %tmp89 = bitcast <2 x double> %arg2 to <4 x float>

-  %tmp90 = fmul <4 x float> %tmp89, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>

-  %tmp91 = bitcast <2 x double> %arg2 to <4 x i32>

-  %tmp92 = and <4 x i32> %tmp91, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>

-  %tmp93 = or <4 x i32> %tmp92, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>

-  %tmp94 = bitcast <4 x i32> %tmp93 to <4 x float>

-  %tmp95 = fadd <4 x float> %tmp90, %tmp94

-  %tmp96 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp95) #2

-  %tmp97 = bitcast <4 x i32> %tmp96 to <2 x i64>

-  %tmp98 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp96) #2

-  %tmp99 = fmul <4 x float> %tmp98, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>

-  %tmp100 = fsub <4 x float> %tmp89, %tmp99

-  %tmp101 = fmul <4 x float> %tmp98, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>

-  %tmp102 = fsub <4 x float> %tmp100, %tmp101

-  %tmp103 = fmul <4 x float> %tmp102, %tmp102

-  %tmp104 = fmul <4 x float> %tmp102, %tmp103

-  %tmp105 = fmul <4 x float> %tmp103, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>

-  %tmp106 = fadd <4 x float> %tmp105, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>

-  %tmp107 = fmul <4 x float> %tmp103, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>

-  %tmp108 = fadd <4 x float> %tmp107, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>

-  %tmp109 = fmul <4 x float> %tmp103, %tmp106

-  %tmp110 = fadd <4 x float> %tmp109, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>

-  %tmp111 = fmul <4 x float> %tmp103, %tmp108

-  %tmp112 = fadd <4 x float> %tmp111, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>

-  %tmp113 = fmul <4 x float> %tmp103, %tmp110

-  %tmp114 = fadd <4 x float> %tmp113, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>

-  %tmp115 = fmul <4 x float> %tmp104, %tmp112

-  %tmp116 = fadd <4 x float> %tmp102, %tmp115

-  %tmp117 = and <2 x i64> %tmp97, <i64 4294967297, i64 4294967297>

-  %tmp118 = bitcast <2 x i64> %tmp117 to <4 x i32>

-  %tmp119 = icmp eq <4 x i32> %tmp118, zeroinitializer

-  %tmp120 = sext <4 x i1> %tmp119 to <4 x i32>

-  %tmp121 = bitcast <4 x i32> %tmp120 to <4 x float>

-  %tmp122 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp114, <4 x float> %tmp116, <4 x float> %tmp121) #2

-  %tmp123 = and <2 x i64> %tmp97, <i64 8589934594, i64 8589934594>

-  %tmp124 = bitcast <2 x i64> %tmp123 to <4 x i32>

-  %tmp125 = icmp eq <4 x i32> %tmp124, zeroinitializer

-  %tmp126 = sext <4 x i1> %tmp125 to <4 x i32>

-  %tmp127 = bitcast <4 x float> %tmp122 to <4 x i32>

-  %tmp128 = xor <4 x i32> %tmp127, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>

-  %tmp129 = bitcast <4 x i32> %tmp128 to <4 x float>

-  %tmp130 = bitcast <4 x i32> %tmp126 to <4 x float>

-  %tmp131 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp129, <4 x float> %tmp122, <4 x float> %tmp130) #2

-  %tmp132 = fadd <4 x float> %tmp88, %tmp131

-  %tmp133 = bitcast <4 x float> %tmp132 to <2 x double>

-  ret <2 x double> %tmp133

-}

-

-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)

-declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)

-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)

+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; Check that we do not get excessive spilling from splitting of constant live ranges.
+
+; CHECK-LABEL: PR24139:
+; CHECK: # 16-byte Spill
+; CHECK-NOT: # 16-byte Spill
+; CHECK: retq
+
+define <2 x double> @PR24139(<2 x double> %arg, <2 x double> %arg1, <2 x double> %arg2) {
+  %tmp = bitcast <2 x double> %arg to <4 x float>
+  %tmp3 = fmul <4 x float> %tmp, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+  %tmp4 = bitcast <2 x double> %arg to <4 x i32>
+  %tmp5 = and <4 x i32> %tmp4, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp6 = or <4 x i32> %tmp5, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  %tmp7 = bitcast <4 x i32> %tmp6 to <4 x float>
+  %tmp8 = fadd <4 x float> %tmp3, %tmp7
+  %tmp9 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp8) #2
+  %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
+  %tmp11 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp9) #2
+  %tmp12 = fmul <4 x float> %tmp11, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+  %tmp13 = fsub <4 x float> %tmp, %tmp12
+  %tmp14 = fmul <4 x float> %tmp11, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+  %tmp15 = fsub <4 x float> %tmp13, %tmp14
+  %tmp16 = fmul <4 x float> %tmp15, %tmp15
+  %tmp17 = fmul <4 x float> %tmp15, %tmp16
+  %tmp18 = fmul <4 x float> %tmp16, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+  %tmp19 = fadd <4 x float> %tmp18, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+  %tmp20 = fmul <4 x float> %tmp16, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+  %tmp21 = fadd <4 x float> %tmp20, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+  %tmp22 = fmul <4 x float> %tmp16, %tmp19
+  %tmp23 = fadd <4 x float> %tmp22, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+  %tmp24 = fmul <4 x float> %tmp16, %tmp21
+  %tmp25 = fadd <4 x float> %tmp24, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+  %tmp26 = fmul <4 x float> %tmp16, %tmp23
+  %tmp27 = fadd <4 x float> %tmp26, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %tmp28 = fmul <4 x float> %tmp17, %tmp25
+  %tmp29 = fadd <4 x float> %tmp15, %tmp28
+  %tmp30 = and <2 x i64> %tmp10, <i64 4294967297, i64 4294967297>
+  %tmp31 = bitcast <2 x i64> %tmp30 to <4 x i32>
+  %tmp32 = icmp eq <4 x i32> %tmp31, zeroinitializer
+  %tmp33 = sext <4 x i1> %tmp32 to <4 x i32>
+  %tmp34 = bitcast <4 x i32> %tmp33 to <4 x float>
+  %tmp35 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp27, <4 x float> %tmp29, <4 x float> %tmp34) #2
+  %tmp36 = and <2 x i64> %tmp10, <i64 8589934594, i64 8589934594>
+  %tmp37 = bitcast <2 x i64> %tmp36 to <4 x i32>
+  %tmp38 = icmp eq <4 x i32> %tmp37, zeroinitializer
+  %tmp39 = sext <4 x i1> %tmp38 to <4 x i32>
+  %tmp40 = bitcast <4 x float> %tmp35 to <4 x i32>
+  %tmp41 = xor <4 x i32> %tmp40, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp42 = bitcast <4 x i32> %tmp41 to <4 x float>
+  %tmp43 = bitcast <4 x i32> %tmp39 to <4 x float>
+  %tmp44 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp42, <4 x float> %tmp35, <4 x float> %tmp43) #2
+  %tmp45 = bitcast <2 x double> %arg1 to <4 x float>
+  %tmp46 = fmul <4 x float> %tmp45, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+  %tmp47 = bitcast <2 x double> %arg1 to <4 x i32>
+  %tmp48 = and <4 x i32> %tmp47, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp49 = or <4 x i32> %tmp48, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  %tmp50 = bitcast <4 x i32> %tmp49 to <4 x float>
+  %tmp51 = fadd <4 x float> %tmp46, %tmp50
+  %tmp52 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp51) #2
+  %tmp53 = bitcast <4 x i32> %tmp52 to <2 x i64>
+  %tmp54 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp52) #2
+  %tmp55 = fmul <4 x float> %tmp54, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+  %tmp56 = fsub <4 x float> %tmp45, %tmp55
+  %tmp57 = fmul <4 x float> %tmp54, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+  %tmp58 = fsub <4 x float> %tmp56, %tmp57
+  %tmp59 = fmul <4 x float> %tmp58, %tmp58
+  %tmp60 = fmul <4 x float> %tmp58, %tmp59
+  %tmp61 = fmul <4 x float> %tmp59, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+  %tmp62 = fadd <4 x float> %tmp61, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+  %tmp63 = fmul <4 x float> %tmp59, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+  %tmp64 = fadd <4 x float> %tmp63, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+  %tmp65 = fmul <4 x float> %tmp59, %tmp62
+  %tmp66 = fadd <4 x float> %tmp65, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+  %tmp67 = fmul <4 x float> %tmp59, %tmp64
+  %tmp68 = fadd <4 x float> %tmp67, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+  %tmp69 = fmul <4 x float> %tmp59, %tmp66
+  %tmp70 = fadd <4 x float> %tmp69, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %tmp71 = fmul <4 x float> %tmp60, %tmp68
+  %tmp72 = fadd <4 x float> %tmp58, %tmp71
+  %tmp73 = and <2 x i64> %tmp53, <i64 4294967297, i64 4294967297>
+  %tmp74 = bitcast <2 x i64> %tmp73 to <4 x i32>
+  %tmp75 = icmp eq <4 x i32> %tmp74, zeroinitializer
+  %tmp76 = sext <4 x i1> %tmp75 to <4 x i32>
+  %tmp77 = bitcast <4 x i32> %tmp76 to <4 x float>
+  %tmp78 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp70, <4 x float> %tmp72, <4 x float> %tmp77) #2
+  %tmp79 = and <2 x i64> %tmp53, <i64 8589934594, i64 8589934594>
+  %tmp80 = bitcast <2 x i64> %tmp79 to <4 x i32>
+  %tmp81 = icmp eq <4 x i32> %tmp80, zeroinitializer
+  %tmp82 = sext <4 x i1> %tmp81 to <4 x i32>
+  %tmp83 = bitcast <4 x float> %tmp78 to <4 x i32>
+  %tmp84 = xor <4 x i32> %tmp83, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp85 = bitcast <4 x i32> %tmp84 to <4 x float>
+  %tmp86 = bitcast <4 x i32> %tmp82 to <4 x float>
+  %tmp87 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp85, <4 x float> %tmp78, <4 x float> %tmp86) #2
+  %tmp88 = fadd <4 x float> %tmp44, %tmp87
+  %tmp89 = bitcast <2 x double> %arg2 to <4 x float>
+  %tmp90 = fmul <4 x float> %tmp89, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+  %tmp91 = bitcast <2 x double> %arg2 to <4 x i32>
+  %tmp92 = and <4 x i32> %tmp91, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp93 = or <4 x i32> %tmp92, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  %tmp94 = bitcast <4 x i32> %tmp93 to <4 x float>
+  %tmp95 = fadd <4 x float> %tmp90, %tmp94
+  %tmp96 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp95) #2
+  %tmp97 = bitcast <4 x i32> %tmp96 to <2 x i64>
+  %tmp98 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp96) #2
+  %tmp99 = fmul <4 x float> %tmp98, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+  %tmp100 = fsub <4 x float> %tmp89, %tmp99
+  %tmp101 = fmul <4 x float> %tmp98, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+  %tmp102 = fsub <4 x float> %tmp100, %tmp101
+  %tmp103 = fmul <4 x float> %tmp102, %tmp102
+  %tmp104 = fmul <4 x float> %tmp102, %tmp103
+  %tmp105 = fmul <4 x float> %tmp103, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+  %tmp106 = fadd <4 x float> %tmp105, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+  %tmp107 = fmul <4 x float> %tmp103, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+  %tmp108 = fadd <4 x float> %tmp107, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+  %tmp109 = fmul <4 x float> %tmp103, %tmp106
+  %tmp110 = fadd <4 x float> %tmp109, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+  %tmp111 = fmul <4 x float> %tmp103, %tmp108
+  %tmp112 = fadd <4 x float> %tmp111, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+  %tmp113 = fmul <4 x float> %tmp103, %tmp110
+  %tmp114 = fadd <4 x float> %tmp113, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %tmp115 = fmul <4 x float> %tmp104, %tmp112
+  %tmp116 = fadd <4 x float> %tmp102, %tmp115
+  %tmp117 = and <2 x i64> %tmp97, <i64 4294967297, i64 4294967297>
+  %tmp118 = bitcast <2 x i64> %tmp117 to <4 x i32>
+  %tmp119 = icmp eq <4 x i32> %tmp118, zeroinitializer
+  %tmp120 = sext <4 x i1> %tmp119 to <4 x i32>
+  %tmp121 = bitcast <4 x i32> %tmp120 to <4 x float>
+  %tmp122 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp114, <4 x float> %tmp116, <4 x float> %tmp121) #2
+  %tmp123 = and <2 x i64> %tmp97, <i64 8589934594, i64 8589934594>
+  %tmp124 = bitcast <2 x i64> %tmp123 to <4 x i32>
+  %tmp125 = icmp eq <4 x i32> %tmp124, zeroinitializer
+  %tmp126 = sext <4 x i1> %tmp125 to <4 x i32>
+  %tmp127 = bitcast <4 x float> %tmp122 to <4 x i32>
+  %tmp128 = xor <4 x i32> %tmp127, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp129 = bitcast <4 x i32> %tmp128 to <4 x float>
+  %tmp130 = bitcast <4 x i32> %tmp126 to <4 x float>
+  %tmp131 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp129, <4 x float> %tmp122, <4 x float> %tmp130) #2
+  %tmp132 = fadd <4 x float> %tmp88, %tmp131
+  %tmp133 = bitcast <4 x float> %tmp132 to <2 x double>
+  ret <2 x double> %tmp133
+}
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/X86/statepoint-far-call.ll b/llvm/test/CodeGen/X86/statepoint-far-call.ll
index 2ebf38c..dc49061 100644
--- a/llvm/test/CodeGen/X86/statepoint-far-call.ll
+++ b/llvm/test/CodeGen/X86/statepoint-far-call.ll
@@ -1,22 +1,22 @@
-; RUN: llc < %s | FileCheck %s

-; Test to check that Statepoints with X64 far-immediate targets

-; are lowered correctly to an indirect call via a scratch register.

-

-target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"

-target triple = "x86_64-pc-win64"

-

-define void @test_far_call() gc "statepoint-example" {

-; CHECK-LABEL: test_far_call

-; CHECK: pushq %rax

-; CHECK: movabsq $140727162896504, %rax 

-; CHECK: callq *%rax

-; CHECK: popq %rax

-; CHECK: retq

-

-entry:

-  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)  

-  ret void

-}

-

-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)

-

+; RUN: llc < %s | FileCheck %s
+; Test to check that Statepoints with X64 far-immediate targets
+; are lowered correctly to an indirect call via a scratch register.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win64"
+
+define void @test_far_call() gc "statepoint-example" {
+; CHECK-LABEL: test_far_call
+; CHECK: pushq %rax
+; CHECK: movabsq $140727162896504, %rax 
+; CHECK: callq *%rax
+; CHECK: popq %rax
+; CHECK: retq
+
+entry:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)  
+  ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll
index feec951..62cd625 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsave.ll
@@ -1,41 +1,41 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave | FileCheck %s

-

-define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsave

-; CHECK: movl  %edx, %eax

-; CHECK: movl  %esi, %edx

-; CHECK: xsave (%rdi)

-  call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsave(i8*, i32, i32)

-

-define void @test_xsave64(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsave64

-; CHECK: movl    %edx, %eax

-; CHECK: movl    %esi, %edx

-; CHECK: xsave64 (%rdi)

-  call void @llvm.x86.xsave64(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsave64(i8*, i32, i32)

-

-define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xrstor

-; CHECK: movl   %edx, %eax

-; CHECK: movl   %esi, %edx

-; CHECK: xrstor (%rdi)

-  call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xrstor(i8*, i32, i32)

-

-define void @test_xrstor64(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xrstor64

-; CHECK: movl     %edx, %eax

-; CHECK: movl     %esi, %edx

-; CHECK: xrstor64 (%rdi)

-  call void @llvm.x86.xrstor64(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xrstor64(i8*, i32, i32)

+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl  %edx, %eax
+; CHECK: movl  %esi, %edx
+; CHECK: xsave (%rdi)
+  call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xsave64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave64
+; CHECK: movl    %edx, %eax
+; CHECK: movl    %esi, %edx
+; CHECK: xsave64 (%rdi)
+  call void @llvm.x86.xsave64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsave64(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl   %edx, %eax
+; CHECK: movl   %esi, %edx
+; CHECK: xrstor (%rdi)
+  call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
+
+define void @test_xrstor64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor64
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xrstor64 (%rdi)
+  call void @llvm.x86.xrstor64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstor64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
index 0680348..c1c5cbd 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
@@ -1,21 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s

-

-define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsavec

-; CHECK: movl   %edx, %eax

-; CHECK: movl   %esi, %edx

-; CHECK: xsavec (%rdi)

-  call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsavec(i8*, i32, i32)

-

-define void @test_xsavec64(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsavec64

-; CHECK: movl     %edx, %eax

-; CHECK: movl     %esi, %edx

-; CHECK: xsavec64 (%rdi)

-  call void @llvm.x86.xsavec64(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsavec64(i8*, i32, i32)

+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl   %edx, %eax
+; CHECK: movl   %esi, %edx
+; CHECK: xsavec (%rdi)
+  call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
+
+define void @test_xsavec64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec64
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xsavec64 (%rdi)
+  call void @llvm.x86.xsavec64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsavec64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
index ee0a536..49603d4 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
@@ -1,21 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsaveopt | FileCheck %s

-

-define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsaveopt

-; CHECK: movl     %edx, %eax

-; CHECK: movl     %esi, %edx

-; CHECK: xsaveopt (%rdi)

-  call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsaveopt(i8*, i32, i32)

-

-define void @test_xsaveopt64(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsaveopt64

-; CHECK: movl       %edx, %eax

-; CHECK: movl       %esi, %edx

-; CHECK: xsaveopt64 (%rdi)

-  call void @llvm.x86.xsaveopt64(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsaveopt64(i8*, i32, i32)

+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xsaveopt (%rdi)
+  call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
+
+define void @test_xsaveopt64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt64
+; CHECK: movl       %edx, %eax
+; CHECK: movl       %esi, %edx
+; CHECK: xsaveopt64 (%rdi)
+  call void @llvm.x86.xsaveopt64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaveopt64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
index 5c1c5be..08d90f5 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
@@ -1,41 +1,41 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s

-

-define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsaves

-; CHECK: movl   %edx, %eax

-; CHECK: movl   %esi, %edx

-; CHECK: xsaves (%rdi)

-  call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsaves(i8*, i32, i32)

-

-define void @test_xsaves64(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsaves64

-; CHECK: movl     %edx, %eax

-; CHECK: movl     %esi, %edx

-; CHECK: xsaves64 (%rdi)

-  call void @llvm.x86.xsaves64(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsaves64(i8*, i32, i32)

-

-define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xrstors

-; CHECK: movl    %edx, %eax

-; CHECK: movl    %esi, %edx

-; CHECK: xrstors (%rdi)

-  call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xrstors(i8*, i32, i32)

-

-define void @test_xrstors64(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xrstors64

-; CHECK: movl      %edx, %eax

-; CHECK: movl      %esi, %edx

-; CHECK: xrstors64 (%rdi)

-  call void @llvm.x86.xrstors64(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xrstors64(i8*, i32, i32)

+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl   %edx, %eax
+; CHECK: movl   %esi, %edx
+; CHECK: xsaves (%rdi)
+  call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xsaves64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves64
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xsaves64 (%rdi)
+  call void @llvm.x86.xsaves64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaves64(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl    %edx, %eax
+; CHECK: movl    %esi, %edx
+; CHECK: xrstors (%rdi)
+  call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
+
+define void @test_xrstors64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors64
+; CHECK: movl      %edx, %eax
+; CHECK: movl      %esi, %edx
+; CHECK: xrstors64 (%rdi)
+  call void @llvm.x86.xrstors64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstors64(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll
index ff9fb7e..deaf1be 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsave.ll
@@ -1,23 +1,23 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave | FileCheck %s

-

-define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsave

-; CHECK: movl  8(%esp), %edx

-; CHECK: movl  12(%esp), %eax

-; CHECK: movl  4(%esp), %ecx

-; CHECK: xsave (%ecx)

-  call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsave(i8*, i32, i32)

-

-define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xrstor

-; CHECK: movl   8(%esp), %edx

-; CHECK: movl   12(%esp), %eax

-; CHECK: movl   4(%esp), %ecx

-; CHECK: xrstor (%ecx)

-  call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xrstor(i8*, i32, i32)

+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl  8(%esp), %edx
+; CHECK: movl  12(%esp), %eax
+; CHECK: movl  4(%esp), %ecx
+; CHECK: xsave (%ecx)
+  call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl   8(%esp), %edx
+; CHECK: movl   12(%esp), %eax
+; CHECK: movl   4(%esp), %ecx
+; CHECK: xrstor (%ecx)
+  call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll
index 4a55ea9..a457607 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsavec.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s

-

-define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsavec

-; CHECK: movl   8(%esp), %edx

-; CHECK: movl   12(%esp), %eax

-; CHECK: movl   4(%esp), %ecx

-; CHECK: xsavec (%ecx)

-  call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsavec(i8*, i32, i32)

+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl   8(%esp), %edx
+; CHECK: movl   12(%esp), %eax
+; CHECK: movl   4(%esp), %ecx
+; CHECK: xsavec (%ecx)
+  call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
index f9bd7ac..4bef3fd 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaveopt | FileCheck %s

-

-define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsaveopt

-; CHECK: movl     8(%esp), %edx

-; CHECK: movl     12(%esp), %eax

-; CHECK: movl     4(%esp), %ecx

-; CHECK: xsaveopt (%ecx)

-  call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsaveopt(i8*, i32, i32)

+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl     8(%esp), %edx
+; CHECK: movl     12(%esp), %eax
+; CHECK: movl     4(%esp), %ecx
+; CHECK: xsaveopt (%ecx)
+  call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll b/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll
index ca1c5c1..840bbbc 100644
--- a/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll
+++ b/llvm/test/CodeGen/X86/system-intrinsics-xsaves.ll
@@ -1,23 +1,23 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s

-

-define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xsaves

-; CHECK: movl   8(%esp), %edx

-; CHECK: movl   12(%esp), %eax

-; CHECK: movl   4(%esp), %ecx

-; CHECK: xsaves (%ecx)

-  call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xsaves(i8*, i32, i32)

-

-define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {

-; CHECK-LABEL: test_xrstors

-; CHECK: movl    8(%esp), %edx

-; CHECK: movl    12(%esp), %eax

-; CHECK: movl    4(%esp), %ecx

-; CHECK: xrstors (%ecx)

-  call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)

-  ret void;

-}

-declare void @llvm.x86.xrstors(i8*, i32, i32)

+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl   8(%esp), %edx
+; CHECK: movl   12(%esp), %eax
+; CHECK: movl   4(%esp), %ecx
+; CHECK: xsaves (%ecx)
+  call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl    8(%esp), %edx
+; CHECK: movl    12(%esp), %eax
+; CHECK: movl    4(%esp), %ecx
+; CHECK: xrstors (%ecx)
+  call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
diff --git a/llvm/test/CodeGen/X86/vec_partial.ll b/llvm/test/CodeGen/X86/vec_partial.ll
index 709f326..469667a 100644
--- a/llvm/test/CodeGen/X86/vec_partial.ll
+++ b/llvm/test/CodeGen/X86/vec_partial.ll
@@ -1,32 +1,32 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s

-

-; PR11580

-define <3 x float> @addf3(<3 x float> %x) {

-; CHECK-LABEL: addf3

-; CHECK:       # BB#0:

-; CHECK-NEXT:  addps .LCPI0_0(%rip), %xmm0

-; CHECK-NEXT:  retq

-entry:

-  %add = fadd <3 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>

-  ret <3 x float> %add

-}

-

-; PR11580

-define <4 x float> @cvtf3_f4(<3 x float> %x) {

-; CHECK-LABEL: cvtf3_f4

-; CHECK:       # BB#0:

-; CHECK-NEXT:  retq

-entry:

-  %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>

-  ret <4 x float> %extractVec

-}

-

-; PR11580

-define <3 x float> @cvtf4_f3(<4 x float> %x) {

-; CHECK-LABEL: cvtf4_f3

-; CHECK:       # BB#0:

-; CHECK-NEXT:  retq

-entry:

-  %extractVec = shufflevector <4 x float> %x, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>

-  ret <3 x float> %extractVec

-}

+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; PR11580
+define <3 x float> @addf3(<3 x float> %x) {
+; CHECK-LABEL: addf3
+; CHECK:       # BB#0:
+; CHECK-NEXT:  addps .LCPI0_0(%rip), %xmm0
+; CHECK-NEXT:  retq
+entry:
+  %add = fadd <3 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ret <3 x float> %add
+}
+
+; PR11580
+define <4 x float> @cvtf3_f4(<3 x float> %x) {
+; CHECK-LABEL: cvtf3_f4
+; CHECK:       # BB#0:
+; CHECK-NEXT:  retq
+entry:
+  %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x float> %extractVec
+}
+
+; PR11580
+define <3 x float> @cvtf4_f3(<4 x float> %x) {
+; CHECK-LABEL: cvtf4_f3
+; CHECK:       # BB#0:
+; CHECK-NEXT:  retq
+entry:
+  %extractVec = shufflevector <4 x float> %x, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %extractVec
+}
diff --git a/llvm/test/CodeGen/X86/vec_reassociate.ll b/llvm/test/CodeGen/X86/vec_reassociate.ll
index bf2053f..0d33735 100644
--- a/llvm/test/CodeGen/X86/vec_reassociate.ll
+++ b/llvm/test/CodeGen/X86/vec_reassociate.ll
@@ -1,119 +1,119 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s

-

-define <4 x i32> @add_4i32(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @add_4i32

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   paddd %xmm1, %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = add <4 x i32> %a0, <i32  1, i32 -2, i32  3, i32 -4>

-  %2 = add <4 x i32> %a1, <i32 -1, i32  2, i32 -3, i32  4>

-  %3 = add <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @add_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @add_4i32_commute

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   paddd %xmm1, %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = add <4 x i32> <i32  1, i32 -2, i32  3, i32 -4>, %a0

-  %2 = add <4 x i32> <i32 -1, i32  2, i32 -3, i32  4>, %a1

-  %3 = add <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @mul_4i32

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   pmulld %xmm1, %xmm0

-  ;CHECK-NEXT:   pmulld .LCPI2_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>

-  %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>

-  %3 = mul <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @mul_4i32_commute

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   pmulld %xmm1, %xmm0

-  ;CHECK-NEXT:   pmulld .LCPI3_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0

-  %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1

-  %3 = mul <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @and_4i32(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @and_4i32

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   andps %xmm1, %xmm0

-  ;CHECK-NEXT:   andps .LCPI4_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>

-  %2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>

-  %3 = and <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @and_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @and_4i32_commute

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   andps %xmm1, %xmm0

-  ;CHECK-NEXT:   andps .LCPI5_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = and <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0

-  %2 = and <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1

-  %3 = and <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @or_4i32(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @or_4i32

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   orps %xmm1, %xmm0

-  ;CHECK-NEXT:   orps .LCPI6_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = or <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>

-  %2 = or <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>

-  %3 = or <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @or_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @or_4i32_commute

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   orps %xmm1, %xmm0

-  ;CHECK-NEXT:   orps .LCPI7_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = or <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0 

-  %2 = or <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1

-  %3 = or <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @xor_4i32(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @xor_4i32

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   xorps %xmm1, %xmm0

-  ;CHECK-NEXT:   xorps .LCPI8_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = xor <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>

-  %2 = xor <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>

-  %3 = xor <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

-

-define <4 x i32> @xor_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {

-  ;CHECK-LABEL:  @xor_4i32_commute

-  ;CHECK:        # BB#0:

-  ;CHECK-NEXT:   xorps %xmm1, %xmm0

-  ;CHECK-NEXT:   xorps .LCPI9_0(%rip), %xmm0

-  ;CHECK-NEXT:   retq

-  %1 = xor <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0

-  %2 = xor <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1

-  %3 = xor <4 x i32> %1, %2

-  ret <4 x i32> %3

-}

+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s
+
+define <4 x i32> @add_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @add_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   paddd %xmm1, %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = add <4 x i32> %a0, <i32  1, i32 -2, i32  3, i32 -4>
+  %2 = add <4 x i32> %a1, <i32 -1, i32  2, i32 -3, i32  4>
+  %3 = add <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @add_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @add_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   paddd %xmm1, %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = add <4 x i32> <i32  1, i32 -2, i32  3, i32 -4>, %a0
+  %2 = add <4 x i32> <i32 -1, i32  2, i32 -3, i32  4>, %a1
+  %3 = add <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @mul_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   pmulld %xmm1, %xmm0
+  ;CHECK-NEXT:   pmulld .LCPI2_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>
+  %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @mul_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   pmulld %xmm1, %xmm0
+  ;CHECK-NEXT:   pmulld .LCPI3_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0
+  %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @and_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   andps %xmm1, %xmm0
+  ;CHECK-NEXT:   andps .LCPI4_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>
+  %2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>
+  %3 = and <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @and_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   andps %xmm1, %xmm0
+  ;CHECK-NEXT:   andps .LCPI5_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = and <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0
+  %2 = and <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1
+  %3 = and <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @or_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   orps %xmm1, %xmm0
+  ;CHECK-NEXT:   orps .LCPI6_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = or <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>
+  %2 = or <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>
+  %3 = or <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @or_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   orps %xmm1, %xmm0
+  ;CHECK-NEXT:   orps .LCPI7_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = or <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0 
+  %2 = or <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1
+  %3 = or <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @xor_4i32
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   xorps %xmm1, %xmm0
+  ;CHECK-NEXT:   xorps .LCPI8_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = xor <4 x i32> %a0, <i32 -2, i32 -2, i32  3, i32  3>
+  %2 = xor <4 x i32> %a1, <i32 -1, i32 -1, i32  1, i32  1>
+  %3 = xor <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+  ;CHECK-LABEL:  @xor_4i32_commute
+  ;CHECK:        # BB#0:
+  ;CHECK-NEXT:   xorps %xmm1, %xmm0
+  ;CHECK-NEXT:   xorps .LCPI9_0(%rip), %xmm0
+  ;CHECK-NEXT:   retq
+  %1 = xor <4 x i32> <i32 -2, i32 -2, i32  3, i32  3>, %a0
+  %2 = xor <4 x i32> <i32 -1, i32 -1, i32  1, i32  1>, %a1
+  %3 = xor <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 908da3d1..99d0044 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -1,79 +1,79 @@
-; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s

-; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0

-

-%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }

-

-@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*)], section "llvm.metadata"

-

-; Spills eax, putting original esp at +4.

-; No stack adjustment if declared with no error code

-define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {

-  ; CHECK-LABEL: test_isr_no_ecode:

-  ; CHECK: pushl %eax

-  ; CHECK: movl 12(%esp), %eax

-  ; CHECK: popl %eax

-  ; CHECK: iretl

-  ; CHECK0-LABEL: test_isr_no_ecode:

-  ; CHECK0: pushl %eax

-  ; CHECK0: leal 4(%esp), %eax

-  ; CHECK0: movl 8(%eax), %eax

-  ; CHECK0: popl %eax

-  ; CHECK0: iretl

-  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2

-  %flags = load i32, i32* %pflags, align 4

-  call void asm sideeffect "", "r"(i32 %flags)

-  ret void

-}

-

-; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes

-; before return, popping the error code.

-define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %ecode) {

-  ; CHECK-LABEL: test_isr_ecode

-  ; CHECK: pushl %ecx

-  ; CHECK: pushl %eax

-  ; CHECK: movl 8(%esp), %eax

-  ; CHECK: movl 20(%esp), %ecx

-  ; CHECK: popl %eax

-  ; CHECK: popl %ecx

-  ; CHECK: addl $4, %esp

-  ; CHECK: iretl

-  ; CHECK0-LABEL: test_isr_ecode

-  ; CHECK0: pushl %ecx

-  ; CHECK0: pushl %eax

-  ; CHECK0: movl 8(%esp), %eax

-  ; CHECK0: leal 12(%esp), %ecx

-  ; CHECK0: movl 8(%ecx), %ecx

-  ; CHECK0: popl %eax

-  ; CHECK0: popl %ecx

-  ; CHECK0: addl $4, %esp

-  ; CHECK0: iretl

-  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2

-  %flags = load i32, i32* %pflags, align 4

-  call x86_fastcallcc void asm sideeffect "", "r,r"(i32 %flags, i32 %ecode)

-  ret void

-}

-

-; All clobbered registers must be saved

-define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) {

-  call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()

-  ; CHECK-LABEL: test_isr_clobbers

-  ; CHECK-SSE-NEXT: pushl %ebp

-  ; CHECK-SSE-NEXT: pushl %ebx

-  ; CHECK-SSE-NEXT; pushl %eax

-  ; CHECK-SSE-NEXT: popl %eax

-  ; CHECK-SSE-NEXT: popl %ebx

-  ; CHECK-SSE-NEXT: popl %ebp

-  ; CHECK-SSE-NEXT: addl $4, %esp

-  ; CHECK-SSE-NEXT: iretl

-  ; CHECK0-LABEL: test_isr_clobbers

-  ; CHECK0-SSE-NEXT: pushl %ebp

-  ; CHECK0-SSE-NEXT: pushl %ebx

-  ; CHECK0-SSE-NEXT; pushl %eax

-  ; CHECK0-SSE-NEXT: popl %eax

-  ; CHECK0-SSE-NEXT: popl %ebx

-  ; CHECK0-SSE-NEXT: popl %ebp

-  ; CHECK0-SSE-NEXT: addl $4, %esp

-  ; CHECK0-SSE-NEXT: iretl

-  ret void

-}

-

+; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills eax, putting original esp at +4.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+  ; CHECK-LABEL: test_isr_no_ecode:
+  ; CHECK: pushl %eax
+  ; CHECK: movl 12(%esp), %eax
+  ; CHECK: popl %eax
+  ; CHECK: iretl
+  ; CHECK0-LABEL: test_isr_no_ecode:
+  ; CHECK0: pushl %eax
+  ; CHECK0: leal 4(%esp), %eax
+  ; CHECK0: movl 8(%eax), %eax
+  ; CHECK0: popl %eax
+  ; CHECK0: iretl
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i32, i32* %pflags, align 4
+  call void asm sideeffect "", "r"(i32 %flags)
+  ret void
+}
+
+; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %ecode) {
+  ; CHECK-LABEL: test_isr_ecode
+  ; CHECK: pushl %ecx
+  ; CHECK: pushl %eax
+  ; CHECK: movl 8(%esp), %eax
+  ; CHECK: movl 20(%esp), %ecx
+  ; CHECK: popl %eax
+  ; CHECK: popl %ecx
+  ; CHECK: addl $4, %esp
+  ; CHECK: iretl
+  ; CHECK0-LABEL: test_isr_ecode
+  ; CHECK0: pushl %ecx
+  ; CHECK0: pushl %eax
+  ; CHECK0: movl 8(%esp), %eax
+  ; CHECK0: leal 12(%esp), %ecx
+  ; CHECK0: movl 8(%ecx), %ecx
+  ; CHECK0: popl %eax
+  ; CHECK0: popl %ecx
+  ; CHECK0: addl $4, %esp
+  ; CHECK0: iretl
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i32, i32* %pflags, align 4
+  call x86_fastcallcc void asm sideeffect "", "r,r"(i32 %flags, i32 %ecode)
+  ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) {
+  call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
+  ; CHECK-LABEL: test_isr_clobbers
+  ; CHECK-SSE-NEXT: pushl %ebp
+  ; CHECK-SSE-NEXT: pushl %ebx
+  ; CHECK-SSE-NEXT; pushl %eax
+  ; CHECK-SSE-NEXT: popl %eax
+  ; CHECK-SSE-NEXT: popl %ebx
+  ; CHECK-SSE-NEXT: popl %ebp
+  ; CHECK-SSE-NEXT: addl $4, %esp
+  ; CHECK-SSE-NEXT: iretl
+  ; CHECK0-LABEL: test_isr_clobbers
+  ; CHECK0-SSE-NEXT: pushl %ebp
+  ; CHECK0-SSE-NEXT: pushl %ebx
+  ; CHECK0-SSE-NEXT; pushl %eax
+  ; CHECK0-SSE-NEXT: popl %eax
+  ; CHECK0-SSE-NEXT: popl %ebx
+  ; CHECK0-SSE-NEXT: popl %ebp
+  ; CHECK0-SSE-NEXT: addl $4, %esp
+  ; CHECK0-SSE-NEXT: iretl
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc.ll b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
index 8f70b39..429209c 100644
--- a/llvm/test/CodeGen/X86/x86-64-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
@@ -1,86 +1,86 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s

-; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0

-

-%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }

-

-@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*)], section "llvm.metadata"

-

-; Spills rax, putting original esp at +8.

-; No stack adjustment if declared with no error code

-define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {

-  ; CHECK-LABEL: test_isr_no_ecode:

-  ; CHECK: pushq %rax

-  ; CHECK: movq 24(%rsp), %rax

-  ; CHECK: popq %rax

-  ; CHECK: iretq

-  ; CHECK0-LABEL: test_isr_no_ecode:

-  ; CHECK0: pushq %rax

-  ; CHECK0: leaq 8(%rsp), %rax

-  ; CHECK0: movq 16(%rax), %rax

-  ; CHECK0: popq %rax

-  ; CHECK0: iretq

-  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2

-  %flags = load i64, i64* %pflags, align 4

-  call void asm sideeffect "", "r"(i64 %flags)

-  ret void

-}

-

-; Spills rax and rcx, putting original rsp at +16. Stack is adjusted up another 8 bytes

-; before return, popping the error code.

-define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %ecode) {

-  ; CHECK-LABEL: test_isr_ecode

-  ; CHECK: pushq %rax

-  ; CHECK: pushq %rcx

-  ; CHECK: movq 16(%rsp), %rax

-  ; CHECK: movq 40(%rsp), %rcx

-  ; CHECK: popq %rcx

-  ; CHECK: popq %rax

-  ; CHECK: addq $8, %rsp

-  ; CHECK: iretq

-  ; CHECK0-LABEL: test_isr_ecode

-  ; CHECK0: pushq %rax

-  ; CHECK0: pushq %rcx

-  ; CHECK0: movq 16(%rsp), %rax

-  ; CHECK0: leaq 24(%rsp), %rcx

-  ; CHECK0: movq 16(%rcx), %rcx

-  ; CHECK0: popq %rcx

-  ; CHECK0: popq %rax

-  ; CHECK0: addq $8, %rsp

-  ; CHECK0: iretq

-  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2

-  %flags = load i64, i64* %pflags, align 4

-  call void asm sideeffect "", "r,r"(i64 %flags, i64 %ecode)

-  ret void

-}

-

-; All clobbered registers must be saved

-define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) {

-  call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"()

-  ; CHECK-LABEL: test_isr_clobbers

-  ; CHECK-SSE-NEXT: pushq %rax

-  ; CHECK-SSE-NEXT; pushq %r11

-  ; CHECK-SSE-NEXT: pushq %rbp

-  ; CHECK-SSE-NEXT: pushq %rbx

-  ; CHECK-SSE-NEXT: movaps %xmm0

-  ; CHECK-SSE-NEXT: movaps %xmm0

-  ; CHECK-SSE-NEXT: popq %rbx

-  ; CHECK-SSE-NEXT: popq %rbp

-  ; CHECK-SSE-NEXT: popq %r11

-  ; CHECK-SSE-NEXT: popq %rax

-  ; CHECK-SSE-NEXT: addq $8, %rsp

-  ; CHECK-SSE-NEXT: iretq

-  ; CHECK0-LABEL: test_isr_clobbers

-  ; CHECK0-SSE-NEXT: pushq %rax

-  ; CHECK0-SSE-NEXT; pushq %r11

-  ; CHECK0-SSE-NEXT: pushq %rbp

-  ; CHECK0-SSE-NEXT: pushq %rbx

-  ; CHECK0-SSE-NEXT: movaps %xmm0

-  ; CHECK0-SSE-NEXT: movaps %xmm0

-  ; CHECK0-SSE-NEXT: popq %rbx

-  ; CHECK0-SSE-NEXT: popq %rbp

-  ; CHECK0-SSE-NEXT: popq %r11

-  ; CHECK0-SSE-NEXT: popq %rax

-  ; CHECK0-SSE-NEXT: addq $8, %rsp

-  ; CHECK0-SSE-NEXT: iretq

-  ret void

+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills rax, putting original esp at +8.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+  ; CHECK-LABEL: test_isr_no_ecode:
+  ; CHECK: pushq %rax
+  ; CHECK: movq 24(%rsp), %rax
+  ; CHECK: popq %rax
+  ; CHECK: iretq
+  ; CHECK0-LABEL: test_isr_no_ecode:
+  ; CHECK0: pushq %rax
+  ; CHECK0: leaq 8(%rsp), %rax
+  ; CHECK0: movq 16(%rax), %rax
+  ; CHECK0: popq %rax
+  ; CHECK0: iretq
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i64, i64* %pflags, align 4
+  call void asm sideeffect "", "r"(i64 %flags)
+  ret void
+}
+
+; Spills rax and rcx, putting original rsp at +16. Stack is adjusted up another 8 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %ecode) {
+  ; CHECK-LABEL: test_isr_ecode
+  ; CHECK: pushq %rax
+  ; CHECK: pushq %rcx
+  ; CHECK: movq 16(%rsp), %rax
+  ; CHECK: movq 40(%rsp), %rcx
+  ; CHECK: popq %rcx
+  ; CHECK: popq %rax
+  ; CHECK: addq $8, %rsp
+  ; CHECK: iretq
+  ; CHECK0-LABEL: test_isr_ecode
+  ; CHECK0: pushq %rax
+  ; CHECK0: pushq %rcx
+  ; CHECK0: movq 16(%rsp), %rax
+  ; CHECK0: leaq 24(%rsp), %rcx
+  ; CHECK0: movq 16(%rcx), %rcx
+  ; CHECK0: popq %rcx
+  ; CHECK0: popq %rax
+  ; CHECK0: addq $8, %rsp
+  ; CHECK0: iretq
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i64, i64* %pflags, align 4
+  call void asm sideeffect "", "r,r"(i64 %flags, i64 %ecode)
+  ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) {
+  call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"()
+  ; CHECK-LABEL: test_isr_clobbers
+  ; CHECK-SSE-NEXT: pushq %rax
+  ; CHECK-SSE-NEXT; pushq %r11
+  ; CHECK-SSE-NEXT: pushq %rbp
+  ; CHECK-SSE-NEXT: pushq %rbx
+  ; CHECK-SSE-NEXT: movaps %xmm0
+  ; CHECK-SSE-NEXT: movaps %xmm0
+  ; CHECK-SSE-NEXT: popq %rbx
+  ; CHECK-SSE-NEXT: popq %rbp
+  ; CHECK-SSE-NEXT: popq %r11
+  ; CHECK-SSE-NEXT: popq %rax
+  ; CHECK-SSE-NEXT: addq $8, %rsp
+  ; CHECK-SSE-NEXT: iretq
+  ; CHECK0-LABEL: test_isr_clobbers
+  ; CHECK0-SSE-NEXT: pushq %rax
+  ; CHECK0-SSE-NEXT; pushq %r11
+  ; CHECK0-SSE-NEXT: pushq %rbp
+  ; CHECK0-SSE-NEXT: pushq %rbx
+  ; CHECK0-SSE-NEXT: movaps %xmm0
+  ; CHECK0-SSE-NEXT: movaps %xmm0
+  ; CHECK0-SSE-NEXT: popq %rbx
+  ; CHECK0-SSE-NEXT: popq %rbp
+  ; CHECK0-SSE-NEXT: popq %r11
+  ; CHECK0-SSE-NEXT: popq %rax
+  ; CHECK0-SSE-NEXT: addq $8, %rsp
+  ; CHECK0-SSE-NEXT: iretq
+  ret void
 }
\ No newline at end of file