Check in LLVM r95781.

commit: e264f62ca09a8f65c87a46d562a4d0f9ec5d457e [log] [tgz]
author: Shih-wei Liao <sliao@google.com> Wed Feb 10 11:10:31 2010 -0800
committer: Shih-wei Liao <sliao@google.com> Wed Feb 10 11:10:31 2010 -0800
tree: 59e3d57ef656cef79afa708ae0a3daf25cd91fcf
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
new file mode 100644
index 0000000..2484860
--- /dev/null
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll

@@ -0,0 +1,18 @@
+; The old instruction selector used to load all arguments to a call up in 
+; registers, then start pushing them all onto the stack.  This is bad news as
+; it makes a ton of annoying overlapping live ranges.  This code should not
+; cause spills!
+;
+; RUN: llc < %s -march=x86 -stats |& not grep spilled
+
+target datalayout = "e-p:32:32"
+
+define i32 @test(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) {
+        ret i32 0
+}
+
+define i32 @main() {
+        %X = call i32 @test( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 )            ; <i32> [#uses=1]
+        ret i32 %X
+}
+

diff --git a/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
new file mode 100644
index 0000000..5c40eea
--- /dev/null
+++ b/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test() {
+entry:
+        ret i32 7
+Test:           ; No predecessors!
+        %A = call i32 @test( )          ; <i32> [#uses=1]
+        %B = call i32 @test( )          ; <i32> [#uses=1]
+        %C = add i32 %A, %B             ; <i32> [#uses=1]
+        ret i32 %C
+}
+

diff --git a/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/test/CodeGen/X86/2003-11-03-GlobalBool.ll
new file mode 100644
index 0000000..8b0a185
--- /dev/null
+++ b/test/CodeGen/X86/2003-11-03-GlobalBool.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | \
+; RUN:   not grep {.byte\[\[:space:\]\]*true}
+
+@X = global i1 true             ; <i1*> [#uses=0]

diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll
new file mode 100644
index 0000000..f15a1b4
--- /dev/null
+++ b/test/CodeGen/X86/2004-02-12-Memcpy.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
+
+@A = global [32 x i32] zeroinitializer
+@B = global [32 x i32] zeroinitializer
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define void @main() nounwind {
+  ; dword copy
+  call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                           i32 128, i32 4 )
+
+  ; word copy
+  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                           i32 128, i32 2 )
+
+  ; byte copy
+  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                            i32 128, i32 1 )
+
+  ret void
+}

diff --git a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
new file mode 100644
index 0000000..fea2b54
--- /dev/null
+++ b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {pushl	%ebp} | count 1
+; RUN: llc < %s -march=x86 | grep {popl	%ebp} | count 1
+
+declare i8* @llvm.returnaddress(i32)
+
+declare i8* @llvm.frameaddress(i32)
+
+define i8* @test1() {
+        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]
+        ret i8* %X
+}
+
+define i8* @test2() {
+        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]
+        ret i8* %X
+}
+

diff --git a/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
new file mode 100644
index 0000000..f986ebd
--- /dev/null
+++ b/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep -i ESP | not grep sub
+
+define i32 @test(i32 %X) {
+        ret i32 %X
+}

diff --git a/test/CodeGen/X86/2004-02-22-Casts.ll b/test/CodeGen/X86/2004-02-22-Casts.ll
new file mode 100644
index 0000000..dabf7d3
--- /dev/null
+++ b/test/CodeGen/X86/2004-02-22-Casts.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+define i1 @test1(double %X) {
+        %V = fcmp one double %X, 0.000000e+00           ; <i1> [#uses=1]
+        ret i1 %V
+}
+
+define double @test2(i64 %X) {
+        %V = uitofp i64 %X to double            ; <double> [#uses=1]
+        ret double %V
+}
+
+

diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll
new file mode 100644
index 0000000..b6631b6
--- /dev/null
+++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep {j\[lgbe\]}
+
+define i32 @max(i32 %A, i32 %B) {
+        %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
+        %R = select i1 %gt, i32 %A, i32 %B              ; <i32> [#uses=1]
+        ret i32 %R
+}
+

diff --git a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
new file mode 100644
index 0000000..c62fee1
--- /dev/null
+++ b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll

@@ -0,0 +1,13 @@
+; Linear scan does not currently coalesce any two variables that have
+; overlapping live intervals. When two overlapping intervals have the same
+; value, they can be joined though.
+;
+; RUN: llc < %s -march=x86 -regalloc=linearscan | \
+; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
+
+define i64 @test(i64 %x) {
+entry:
+        %tmp.1 = mul i64 %x, 4294967297         ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+

diff --git a/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
new file mode 100644
index 0000000..f8ed016
--- /dev/null
+++ b/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86
+
+define double @test(double %d) {
+        %X = select i1 false, double %d, double %d              ; <double> [#uses=1]
+        ret double %X
+}
+

diff --git a/test/CodeGen/X86/2004-06-10-StackifierCrash.ll b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
new file mode 100644
index 0000000..036aa6a
--- /dev/null
+++ b/test/CodeGen/X86/2004-06-10-StackifierCrash.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+
+define i1 @T(double %X) {
+        %V = fcmp oeq double %X, %X             ; <i1> [#uses=1]
+        ret i1 %V
+}

diff --git a/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
new file mode 100644
index 0000000..db3af01
--- /dev/null
+++ b/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86
+
+define i1 @test(i1 %C, i1 %D, i32 %X, i32 %Y) {
+        %E = icmp slt i32 %X, %Y                ; <i1> [#uses=1]
+        %F = select i1 %C, i1 %D, i1 %E         ; <i1> [#uses=1]
+        ret i1 %F
+}
+

diff --git a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
new file mode 100644
index 0000000..32fafc6
--- /dev/null
+++ b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll

@@ -0,0 +1,17 @@
+; This testcase was distilled from 132.ijpeg.  Bsaically we cannot fold the
+; load into the sub instruction here as it induces a cycle in the dag, which
+; is invalid code (there is no correct way to order the instruction).  Check
+; that we do not fold the load into the sub.
+
+; RUN: llc < %s -march=x86 | not grep sub.*GLOBAL
+
+@GLOBAL = external global i32           ; <i32*> [#uses=1]
+
+define i32 @test(i32* %P1, i32* %P2, i32* %P3) {
+        %L = load i32* @GLOBAL          ; <i32> [#uses=1]
+        store i32 12, i32* %P2
+        %Y = load i32* %P3              ; <i32> [#uses=1]
+        %Z = sub i32 %Y, %L             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+

diff --git a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
new file mode 100644
index 0000000..30a6ac6
--- /dev/null
+++ b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | not grep 18446744073709551612
+
+@A = external global i32                ; <i32*> [#uses=1]
+@Y = global i32* getelementptr (i32* @A, i32 -1)                ; <i32**> [#uses=0]
+

diff --git a/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
new file mode 100644
index 0000000..5266009
--- /dev/null
+++ b/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -mcpu=generic
+; Make sure LLC doesn't crash in the stackifier due to FP PHI nodes.
+
+define void @radfg_() {
+entry:
+        br i1 false, label %no_exit.16.preheader, label %loopentry.0
+loopentry.0:            ; preds = %entry
+        ret void
+no_exit.16.preheader:           ; preds = %entry
+        br label %no_exit.16
+no_exit.16:             ; preds = %no_exit.16, %no_exit.16.preheader
+        br i1 false, label %loopexit.16.loopexit, label %no_exit.16
+loopexit.16.loopexit:           ; preds = %no_exit.16
+        br label %no_exit.18
+no_exit.18:             ; preds = %loopexit.20, %loopexit.16.loopexit
+        %tmp.882 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=2]
+        br i1 false, label %loopexit.19, label %no_exit.19.preheader
+no_exit.19.preheader:           ; preds = %no_exit.18
+        ret void
+loopexit.19:            ; preds = %no_exit.18
+        br i1 false, label %loopexit.20, label %no_exit.20
+no_exit.20:             ; preds = %loopexit.21, %loopexit.19
+        %ai2.1122.tmp.3 = phi float [ %tmp.958, %loopexit.21 ], [ %tmp.882, %loopexit.19 ]              ; <float> [#uses=1]
+        %tmp.950 = fmul float %tmp.882, %ai2.1122.tmp.3          ; <float> [#uses=1]
+        %tmp.951 = fsub float 0.000000e+00, %tmp.950             ; <float> [#uses=1]
+        %tmp.958 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=1]
+        br i1 false, label %loopexit.21, label %no_exit.21.preheader
+no_exit.21.preheader:           ; preds = %no_exit.20
+        ret void
+loopexit.21:            ; preds = %no_exit.20
+        br i1 false, label %loopexit.20, label %no_exit.20
+loopexit.20:            ; preds = %loopexit.21, %loopexit.19
+        %ar2.1124.tmp.2 = phi float [ 0.000000e+00, %loopexit.19 ], [ %tmp.951, %loopexit.21 ]          ; <float> [#uses=0]
+        br i1 false, label %loopexit.18.loopexit, label %no_exit.18
+loopexit.18.loopexit:           ; preds = %loopexit.20
+        ret void
+}
+

diff --git a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
new file mode 100644
index 0000000..d906da4
--- /dev/null
+++ b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep shld | count 1
+;
+; Check that the isel does not fold the shld, which already folds a load
+; and has two uses, into a store.
+
+@A = external global i32                ; <i32*> [#uses=2]
+
+define i32 @test5(i32 %B, i8 %C) {
+        %tmp.1 = load i32* @A           ; <i32> [#uses=1]
+        %shift.upgrd.1 = zext i8 %C to i32              ; <i32> [#uses=1]
+        %tmp.2 = shl i32 %tmp.1, %shift.upgrd.1         ; <i32> [#uses=1]
+        %tmp.3 = sub i8 32, %C          ; <i8> [#uses=1]
+        %shift.upgrd.2 = zext i8 %tmp.3 to i32          ; <i32> [#uses=1]
+        %tmp.4 = lshr i32 %B, %shift.upgrd.2            ; <i32> [#uses=1]
+        %tmp.5 = or i32 %tmp.4, %tmp.2          ; <i32> [#uses=2]
+        store i32 %tmp.5, i32* @A
+        ret i32 %tmp.5
+}
+

diff --git a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
new file mode 100644
index 0000000..dc69ef8
--- /dev/null
+++ b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep {subl.*%esp}
+
+define i32 @f(i32 %a, i32 %b) {
+        %tmp.2 = mul i32 %a, %a         ; <i32> [#uses=1]
+        %tmp.5 = shl i32 %a, 1          ; <i32> [#uses=1]
+        %tmp.6 = mul i32 %tmp.5, %b             ; <i32> [#uses=1]
+        %tmp.10 = mul i32 %b, %b                ; <i32> [#uses=1]
+        %tmp.7 = add i32 %tmp.10, %tmp.2                ; <i32> [#uses=1]
+        %tmp.11 = add i32 %tmp.7, %tmp.6                ; <i32> [#uses=1]
+        ret i32 %tmp.11
+}
+

diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
new file mode 100644
index 0000000..0421896
--- /dev/null
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86  -stats |& \
+; RUN:   grep asm-printer | grep 7
+
+define i32 @g(i32 %a, i32 %b) nounwind {
+        %tmp.1 = shl i32 %b, 1          ; <i32> [#uses=1]
+        %tmp.3 = add i32 %tmp.1, %a             ; <i32> [#uses=1]
+        %tmp.5 = mul i32 %tmp.3, %a             ; <i32> [#uses=1]
+        %tmp.8 = mul i32 %b, %b         ; <i32> [#uses=1]
+        %tmp.9 = add i32 %tmp.5, %tmp.8         ; <i32> [#uses=1]
+        ret i32 %tmp.9
+}
+

diff --git a/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
new file mode 100644
index 0000000..3f67097
--- /dev/null
+++ b/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.6.1"
+	%struct.GLTColor4 = type { float, float, float, float }
+	%struct.GLTCoord3 = type { float, float, float }
+	%struct.__GLIContextRec = type { { %struct.anon, { [24 x [16 x float]], [24 x [16 x float]] }, %struct.GLTColor4, { float, float, float, float, %struct.GLTCoord3, float } }, { float, float, float, float, float, float, float, float, [4 x i32], [4 x i32], [4 x i32] } }
+	%struct.__GLvertex = type { %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTCoord3, float, %struct.GLTColor4, float, float, float, i8, i8, i8, i8, [4 x float], [2 x i8*], i32, i32, [16 x %struct.GLTColor4] }
+	%struct.anon = type { float, float, float, float, float, float, float, float }
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
+
+define void @gleLLVMVecInterpolateClip() {
+entry:
+	br i1 false, label %cond_false, label %cond_false183
+cond_false:		; preds = %entry
+	br i1 false, label %cond_false183, label %cond_true69
+cond_true69:		; preds = %cond_false
+	ret void
+cond_false183:		; preds = %cond_false, %entry
+	%vuizmsk.0.1 = phi <4 x i32> [ < i32 -1, i32 -1, i32 -1, i32 0 >, %entry ], [ < i32 -1, i32 0, i32 0, i32 0 >, %cond_false ]		; <<4 x i32>> [#uses=2]
+	%tmp192 = extractelement <4 x i32> %vuizmsk.0.1, i32 2		; <i32> [#uses=1]
+	%tmp193 = extractelement <4 x i32> %vuizmsk.0.1, i32 3		; <i32> [#uses=2]
+	%tmp195 = insertelement <4 x i32> zeroinitializer, i32 %tmp192, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp196 = insertelement <4 x i32> %tmp195, i32 %tmp193, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp197 = insertelement <4 x i32> %tmp196, i32 %tmp193, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp336 = and <4 x i32> zeroinitializer, %tmp197		; <<4 x i32>> [#uses=1]
+	%tmp337 = bitcast <4 x i32> %tmp336 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp378 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp337, <4 x float> zeroinitializer, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp379 = bitcast <4 x float> %tmp378 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp388 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
+	%tmp392 = bitcast <8 x i16> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp399 = extractelement <8 x i16> %tmp392, i32 7		; <i16> [#uses=1]
+	%tmp423 = insertelement <8 x i16> zeroinitializer, i16 %tmp399, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp427 = bitcast <8 x i16> %tmp423 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp428 = tail call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %tmp427 )		; <i32> [#uses=1]
+	%tmp432 = trunc i32 %tmp428 to i8		; <i8> [#uses=1]
+	%tmp = and i8 %tmp432, 42		; <i8> [#uses=1]
+	%tmp436 = bitcast i8 %tmp to i8		; <i8> [#uses=1]
+	%tmp446 = zext i8 %tmp436 to i32		; <i32> [#uses=1]
+	%tmp447 = shl i32 %tmp446, 24		; <i32> [#uses=1]
+	%tmp449 = or i32 0, %tmp447		; <i32> [#uses=1]
+	store i32 %tmp449, i32* null
+	ret void
+}

diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
new file mode 100644
index 0000000..8783a11
--- /dev/null
+++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
+; RUN: grep {movl	_last} %t | count 1
+; RUN: grep {cmpl.*_last} %t | count 1
+
+@block = external global i8*            ; <i8**> [#uses=1]
+@last = external global i32             ; <i32*> [#uses=3]
+
+define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp.8) {
+newFuncRoot:
+        br label %label.0
+label.0.no_exit.1_crit_edge.exitStub:           ; preds = %label.0
+        ret i1 true
+codeRepl5.exitStub:             ; preds = %label.0
+        ret i1 false
+label.0:                ; preds = %newFuncRoot
+        %tmp.35 = load i32* @last               ; <i32> [#uses=1]
+        %inc.1 = add i32 %tmp.35, 1             ; <i32> [#uses=2]
+        store i32 %inc.1, i32* @last
+        %tmp.36 = load i8** @block              ; <i8*> [#uses=1]
+        %tmp.38 = getelementptr i8* %tmp.36, i32 %inc.1         ; <i8*> [#uses=1]
+        %tmp.40 = trunc i32 %tmp.21.reload to i8                ; <i8> [#uses=1]
+        store i8 %tmp.40, i8* %tmp.38
+        %tmp.910 = load i32* @last              ; <i32> [#uses=1]
+        %tmp.1111 = icmp slt i32 %tmp.910, %tmp.8               ; <i1> [#uses=1]
+        %tmp.1412 = icmp ne i32 %tmp.21.reload, 257             ; <i1> [#uses=1]
+        %tmp.1613 = and i1 %tmp.1111, %tmp.1412         ; <i1> [#uses=1]
+        br i1 %tmp.1613, label %label.0.no_exit.1_crit_edge.exitStub, label %codeRepl5.exitStub
+}
+

diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
new file mode 100644
index 0000000..b045329
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
+; RUN:   not grep {Number of register spills}
+; END.
+
+
+define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %d) {
+	%tmp44 = load <4 x float>* %a		; <<4 x float>> [#uses=9]
+	%tmp46 = load <4 x float>* %b		; <<4 x float>> [#uses=1]
+	%tmp48 = load <4 x float>* %c		; <<4 x float>> [#uses=1]
+	%tmp50 = load <4 x float>* %d		; <<4 x float>> [#uses=1]
+	%tmp51 = bitcast <4 x float> %tmp44 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp = shufflevector <4 x i32> %tmp51, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp52 = bitcast <4 x i32> %tmp to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp60 = xor <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp61 = bitcast <4 x i32> %tmp60 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp74 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp52, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp75 = bitcast <4 x float> %tmp74 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp89 = bitcast <4 x float> %tmp88 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
+	%tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp105.upgrd.2, <4 x float>* %a
+	%tmp108 = bitcast <4 x float> %tmp46 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp109 = shufflevector <4 x i32> %tmp108, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp109.upgrd.3 = bitcast <4 x i32> %tmp109 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp119 = xor <4 x i32> %tmp109, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp120 = bitcast <4 x i32> %tmp119 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp133 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp109.upgrd.3, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp134 = bitcast <4 x float> %tmp133 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp148 = bitcast <4 x float> %tmp147 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
+	%tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp166.upgrd.4, <4 x float>* %b
+	%tmp169 = bitcast <4 x float> %tmp48 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp170 = shufflevector <4 x i32> %tmp169, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp170.upgrd.5 = bitcast <4 x i32> %tmp170 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp180 = xor <4 x i32> %tmp170, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp181 = bitcast <4 x i32> %tmp180 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp194 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp170.upgrd.5, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp195 = bitcast <4 x float> %tmp194 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp209 = bitcast <4 x float> %tmp208 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
+	%tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp227.upgrd.6, <4 x float>* %c
+	%tmp230 = bitcast <4 x float> %tmp50 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp231 = shufflevector <4 x i32> %tmp230, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp231.upgrd.7 = bitcast <4 x i32> %tmp231 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp241 = xor <4 x i32> %tmp231, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp242 = bitcast <4 x i32> %tmp241 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp255 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp231.upgrd.7, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp256 = bitcast <4 x float> %tmp255 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp270 = bitcast <4 x float> %tmp269 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
+	%tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp288.upgrd.8, <4 x float>* %d
+	ret i32 0
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)

diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
new file mode 100644
index 0000000..7d0a6ab
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \
+; RUN:   grep asm-printer | grep 14
+;
+@size20 = external global i32		; <i32*> [#uses=1]
+@in5 = external global i8*		; <i8**> [#uses=1]
+
+define i32 @compare(i8* %a, i8* %b) nounwind {
+	%tmp = bitcast i8* %a to i32*		; <i32*> [#uses=1]
+	%tmp1 = bitcast i8* %b to i32*		; <i32*> [#uses=1]
+	%tmp.upgrd.1 = load i32* @size20		; <i32> [#uses=1]
+	%tmp.upgrd.2 = load i8** @in5		; <i8*> [#uses=2]
+	%tmp3 = load i32* %tmp1		; <i32> [#uses=1]
+	%gep.upgrd.3 = zext i32 %tmp3 to i64		; <i64> [#uses=1]
+	%tmp4 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.3		; <i8*> [#uses=2]
+	%tmp7 = load i32* %tmp		; <i32> [#uses=1]
+	%gep.upgrd.4 = zext i32 %tmp7 to i64		; <i64> [#uses=1]
+	%tmp8 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.4		; <i8*> [#uses=2]
+	%tmp.upgrd.5 = tail call i32 @memcmp( i8* %tmp8, i8* %tmp4, i32 %tmp.upgrd.1 )		; <i32> [#uses=1]
+	ret i32 %tmp.upgrd.5
+}
+
+declare i32 @memcmp(i8*, i8*, i32)
+

diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
new file mode 100644
index 0000000..23954d7
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -stats  |& \
+; RUN:   grep asm-printer | grep 13
+
+define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
+newFuncRoot:
+	br label %cond_true456.i
+bb459.i.exitStub:		; preds = %cond_true456.i
+	store i32 %tmp449.i, i32* %tmp449.i.out
+	ret void
+cond_true456.i:		; preds = %cond_true456.i, %newFuncRoot
+	%__s441.2.4.i = phi i8* [ %tmp451.i.upgrd.1, %cond_true456.i ], [ %tmp435.i, %newFuncRoot ]		; <i8*> [#uses=2]
+	%__h.2.4.i = phi i32 [ %tmp449.i, %cond_true456.i ], [ 0, %newFuncRoot ]	; <i32> [#uses=1]
+	%tmp446.i = mul i32 %__h.2.4.i, 5		; <i32> [#uses=1]
+	%tmp.i = load i8* %__s441.2.4.i		; <i8> [#uses=1]
+	%tmp448.i = sext i8 %tmp.i to i32		; <i32> [#uses=1]
+	%tmp449.i = add i32 %tmp448.i, %tmp446.i		; <i32> [#uses=2]
+	%tmp450.i = ptrtoint i8* %__s441.2.4.i to i32		; <i32> [#uses=1]
+	%tmp451.i = add i32 %tmp450.i, 1		; <i32> [#uses=1]
+	%tmp451.i.upgrd.1 = inttoptr i32 %tmp451.i to i8*		; <i8*> [#uses=2]
+	%tmp45435.i = load i8* %tmp451.i.upgrd.1		; <i8> [#uses=1]
+	%tmp45536.i = icmp eq i8 %tmp45435.i, 0		; <i1> [#uses=1]
+	br i1 %tmp45536.i, label %bb459.i.exitStub, label %cond_true456.i
+}
+

diff --git a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
new file mode 100644
index 0000000..8421483
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll

@@ -0,0 +1,25 @@
+; Coalescing from R32 to a subset R32_. Once another register coalescer bug is
+; fixed, the movb should go away as well.
+
+; RUN: llc < %s -march=x86 -relocation-model=static | \
+; RUN:   grep movl
+
+@B = external global i32		; <i32*> [#uses=2]
+@C = external global i16*		; <i16**> [#uses=2]
+
+define void @test(i32 %A) {
+	%A.upgrd.1 = trunc i32 %A to i8		; <i8> [#uses=1]
+	%tmp2 = load i32* @B		; <i32> [#uses=1]
+	%tmp3 = and i8 %A.upgrd.1, 16		; <i8> [#uses=1]
+	%shift.upgrd.2 = zext i8 %tmp3 to i32		; <i32> [#uses=1]
+	%tmp4 = shl i32 %tmp2, %shift.upgrd.2		; <i32> [#uses=1]
+	store i32 %tmp4, i32* @B
+	%tmp6 = lshr i32 %A, 3		; <i32> [#uses=1]
+	%tmp = load i16** @C		; <i16*> [#uses=1]
+	%tmp8 = ptrtoint i16* %tmp to i32		; <i32> [#uses=1]
+	%tmp9 = add i32 %tmp8, %tmp6		; <i32> [#uses=1]
+	%tmp9.upgrd.3 = inttoptr i32 %tmp9 to i16*		; <i16*> [#uses=1]
+	store i16* %tmp9.upgrd.3, i16** @C
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll
new file mode 100644
index 0000000..d58d638
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}
+
+@A = external global i16*		; <i16**> [#uses=1]
+@B = external global i32		; <i32*> [#uses=1]
+@C = external global i32		; <i32*> [#uses=2]
+
+define void @test() {
+	%tmp = load i16** @A		; <i16*> [#uses=1]
+	%tmp1 = getelementptr i16* %tmp, i32 1		; <i16*> [#uses=1]
+	%tmp.upgrd.1 = load i16* %tmp1		; <i16> [#uses=1]
+	%tmp3 = zext i16 %tmp.upgrd.1 to i32		; <i32> [#uses=1]
+	%tmp.upgrd.2 = load i32* @B		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp.upgrd.2, 16		; <i32> [#uses=1]
+	%tmp5 = load i32* @C		; <i32> [#uses=1]
+	%tmp6 = trunc i32 %tmp4 to i8		; <i8> [#uses=2]
+	%shift.upgrd.3 = zext i8 %tmp6 to i32		; <i32> [#uses=1]
+	%tmp7 = shl i32 %tmp5, %shift.upgrd.3		; <i32> [#uses=1]
+	%tmp9 = xor i8 %tmp6, 16		; <i8> [#uses=1]
+	%shift.upgrd.4 = zext i8 %tmp9 to i32		; <i32> [#uses=1]
+	%tmp11 = lshr i32 %tmp3, %shift.upgrd.4		; <i32> [#uses=1]
+	%tmp12 = or i32 %tmp11, %tmp7		; <i32> [#uses=1]
+	store i32 %tmp12, i32* @C
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
new file mode 100644
index 0000000..bdbe713
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
+; RUN:     grep {asm-printer} | grep 31
+
+target datalayout = "e-p:32:32"
+define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
+entry:
+	%tmp9 = icmp slt i32 %M, 5		; <i1> [#uses=1]
+	br i1 %tmp9, label %return, label %cond_true
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = shl i32 %indvar, 2		; <i32> [#uses=1]
+	%tmp.10 = add nsw i32 %tmp., 1		; <i32> [#uses=2]
+	%tmp31 = add nsw i32 %tmp.10, -1		; <i32> [#uses=4]
+	%tmp32 = getelementptr i32* %mpp, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp34 = bitcast i32* %tmp32 to <16 x i8>*		; <i8*> [#uses=1]
+	%tmp = load <16 x i8>* %tmp34, align 1
+	%tmp42 = getelementptr i32* %tpmm, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp46 = load <4 x i32>* %tmp42.upgrd.1		; <<4 x i32>> [#uses=1]
+	%tmp54 = bitcast <16 x i8> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp55 = add <4 x i32> %tmp54, %tmp46		; <<4 x i32>> [#uses=2]
+	%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp62 = getelementptr i32* %ip, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp65 = bitcast i32* %tmp62 to <16 x i8>*		; <i8*> [#uses=1]
+	%tmp66 = load <16 x i8>* %tmp65, align 1
+	%tmp73 = getelementptr i32* %tpim, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp77 = load <4 x i32>* %tmp73.upgrd.3		; <<4 x i32>> [#uses=1]
+	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
+	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
+	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]
+	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]
+	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
+	%tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4		; <<2 x i64>> [#uses=1]
+	%tmp131 = or <2 x i64> %tmp121, %tmp111		; <<2 x i64>> [#uses=1]
+	%tmp137 = getelementptr i32* %mc, i32 %tmp.10		; <i32*> [#uses=1]
+	%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
+	%tmp147 = add nsw i32 %tmp.10, 8		; <i32> [#uses=1]
+	%tmp.upgrd.8 = icmp slt i32 %tmp147, %M		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.upgrd.8, label %cond_true, label %return
+
+return:		; preds = %cond_true, %entry
+	ret void
+}
+
+declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)

diff --git a/test/CodeGen/X86/2006-05-17-VectorArg.ll b/test/CodeGen/X86/2006-05-17-VectorArg.ll
new file mode 100644
index 0000000..b36d61e
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-17-VectorArg.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <4 x float> @opRSQ(<4 x float> %a) nounwind {
+entry:
+	%tmp2 = extractelement <4 x float> %a, i32 3		; <float> [#uses=2]
+	%abscond = fcmp oge float %tmp2, -0.000000e+00		; <i1> [#uses=1]
+	%abs = select i1 %abscond, float %tmp2, float 0.000000e+00		; <float> [#uses=1]
+	%tmp3 = tail call float @llvm.sqrt.f32( float %abs )		; <float> [#uses=1]
+	%tmp4 = fdiv float 1.000000e+00, %tmp3		; <float> [#uses=1]
+	%tmp11 = insertelement <4 x float> zeroinitializer, float %tmp4, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp11
+}
+
+declare float @llvm.sqrt.f32(float)
+

diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
new file mode 100644
index 0000000..083d068
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep setnp
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math | \
+; RUN:   not grep setnp
+
+define i32 @test(float %f) {
+	%tmp = fcmp oeq float %f, 0.000000e+00		; <i1> [#uses=1]
+	%tmp.upgrd.1 = zext i1 %tmp to i32		; <i32> [#uses=1]
+	ret i32 %tmp.upgrd.1
+}
+

diff --git a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
new file mode 100644
index 0000000..0288278
--- /dev/null
+++ b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test() {
+	br i1 false, label %cond_next33, label %cond_true12
+cond_true12:		; preds = %0
+	ret i32 0
+cond_next33:		; preds = %0
+	%tmp44.i = call double @foo( double 0.000000e+00, i32 32 )		; <double> [#uses=1]
+	%tmp61.i = load i8* null		; <i8> [#uses=1]
+	%tmp61.i.upgrd.1 = zext i8 %tmp61.i to i32		; <i32> [#uses=1]
+	%tmp58.i = or i32 0, %tmp61.i.upgrd.1		; <i32> [#uses=1]
+	%tmp62.i = or i32 %tmp58.i, 0		; <i32> [#uses=1]
+	%tmp62.i.upgrd.2 = sitofp i32 %tmp62.i to double		; <double> [#uses=1]
+	%tmp64.i = fadd double %tmp62.i.upgrd.2, %tmp44.i		; <double> [#uses=1]
+	%tmp68.i = call double @foo( double %tmp64.i, i32 0 )		; <double> [#uses=0]
+	ret i32 0
+}
+
+declare double @foo(double, i32)
+

diff --git a/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
new file mode 100644
index 0000000..4ea364d
--- /dev/null
+++ b/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86
+; PR825
+
+define i64 @test() {
+	%tmp.i5 = call i64 asm sideeffect "rdtsc", "=A,~{dirflag},~{fpsr},~{flags}"( )		; <i64> [#uses=1]
+	ret i64 %tmp.i5
+}
+

diff --git a/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
new file mode 100644
index 0000000..568fbbc
--- /dev/null
+++ b/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+; PR828
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define void @_ZN5() {
+cond_true9:
+	%tmp3.i.i = call i32 asm sideeffect "lock; cmpxchg $1,$2", "={ax},q,m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i32 0, i32* null, i32 0 )		; <i32> [#uses=0]
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2006-07-19-ATTAsm.ll b/test/CodeGen/X86/2006-07-19-ATTAsm.ll
new file mode 100644
index 0000000..c8fd10f
--- /dev/null
+++ b/test/CodeGen/X86/2006-07-19-ATTAsm.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att
+; PR834
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i386-unknown-freebsd6.1"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, {  }*, i8*, {  }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, {  }*, i32, i8*, i8*, i8* }
+	%llvm.dbg.global_variable.type = type { i32, {  }*, {  }*, i8*, i8 *, i8*, {  }*, i32, {  }*, i1, i1, {  }* }
+@x = global i32 0		; <i32*> [#uses=1]
+@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type {
+    i32 327732,
+    {  }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to {  }*), 
+    {  }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to {  }*), 
+    i8* getelementptr ([2 x i8]* @str, i64 0, i64 0), 
+    i8* getelementptr ([2 x i8]* @str, i64 0, i64 0), 
+    i8* null, 
+    {  }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to {  }*), 
+    i32 1, 
+    {  }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to {  }*), 
+    i1 false, 
+    i1 true, 
+    {  }* bitcast (i32* @x to {  }*) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
+@llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { i32 327680, i32 52 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type {
+    i32 327697, 
+    {  }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to {  }*), 
+    i32 4, 
+    i8* getelementptr ([10 x i8]* @str1, i64 0, i64 0), 
+    i8* getelementptr ([32 x i8]* @str2, i64 0, i64 0), 
+    i8* getelementptr ([45 x i8]* @str3, i64 0, i64 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 327680, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@str1 = internal constant [10 x i8] c"testb.cpp\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
+@str2 = internal constant [32 x i8] c"/Sources/Projects/DwarfTesting/\00", section "llvm.metadata"		; <[32 x i8]*> [#uses=1]
+@str3 = internal constant [45 x i8] c"4.0.1 LLVM (Apple Computer, Inc. build 5400)\00", section "llvm.metadata"		; <[45 x i8]*> [#uses=1]
+@str = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
+    i32 327716, 
+    {  }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to {  }*), 
+    i8* getelementptr ([4 x i8]* @str4, i64 0, i64 0), 
+    {  }* null, 
+    i32 0, 
+    i64 32, 
+    i64 32, 
+    i64 0, 
+    i32 0, 
+    i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]

diff --git a/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
new file mode 100644
index 0000000..cac47cd
--- /dev/null
+++ b/test/CodeGen/X86/2006-07-20-InlineAsm.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86
+; PR833
+
+@G = weak global i32 0		; <i32*> [#uses=3]
+
+define i32 @foo(i32 %X) {
+entry:
+	%X_addr = alloca i32		; <i32*> [#uses=3]
+	store i32 %X, i32* %X_addr
+	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,m,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32* @G, i32 %X )
+	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32 @foo2(i32 %X) {
+entry:
+	%X_addr = alloca i32		; <i32*> [#uses=3]
+	store i32 %X, i32* %X_addr
+	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32 %X )
+	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+

diff --git a/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
new file mode 100644
index 0000000..deae086
--- /dev/null
+++ b/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep -- 4294967240
+; PR853
+
+@X = global i32* inttoptr (i64 -56 to i32*)		; <i32**> [#uses=0]
+

diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
new file mode 100644
index 0000000..3159cec
--- /dev/null
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll

@@ -0,0 +1,10 @@
+; PR850
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
+; RUN: grep {movl 4(%eax),%ebp} %t
+; RUN: grep {movl 0(%eax), %ebx} %t
+
+define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) {
+	%tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint  $$0x80\0Apop  %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i )		; <i32> [#uses=1]
+	ret i32 %tmp9.i.i
+}
+

diff --git a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
new file mode 100644
index 0000000..aea707e
--- /dev/null
+++ b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+	%struct.foo = type opaque
+
+define fastcc i32 @test(%struct.foo* %v, %struct.foo* %vi) {
+	br i1 false, label %ilog2.exit, label %cond_true.i
+
+cond_true.i:		; preds = %0
+	ret i32 0
+
+ilog2.exit:		; preds = %0
+	%tmp24.i = load i32* null		; <i32> [#uses=1]
+	%tmp13.i12.i = tail call double @ldexp( double 0.000000e+00, i32 0 )		; <double> [#uses=1]
+	%tmp13.i13.i = fptrunc double %tmp13.i12.i to float		; <float> [#uses=1]
+	%tmp11.s = load i32* null		; <i32> [#uses=1]
+	%tmp11.i = bitcast i32 %tmp11.s to i32		; <i32> [#uses=1]
+	%n.i = bitcast i32 %tmp24.i to i32		; <i32> [#uses=1]
+	%tmp13.i7 = mul i32 %tmp11.i, %n.i		; <i32> [#uses=1]
+	%tmp.i8 = tail call i8* @calloc( i32 %tmp13.i7, i32 4 )		; <i8*> [#uses=0]
+	br i1 false, label %bb224.preheader.i, label %bb.i
+
+bb.i:		; preds = %ilog2.exit
+	ret i32 0
+
+bb224.preheader.i:		; preds = %ilog2.exit
+	%tmp165.i = fpext float %tmp13.i13.i to double		; <double> [#uses=0]
+	ret i32 0
+}
+
+declare i8* @calloc(i32, i32)
+
+declare double @ldexp(double, i32)

diff --git a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
new file mode 100644
index 0000000..5fee326
--- /dev/null
+++ b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86
+	%struct.expr = type { %struct.rtx_def*, i32, %struct.expr*, %struct.occr*, %struct.occr*, %struct.rtx_def* }
+	%struct.hash_table = type { %struct.expr**, i32, i32, i32 }
+	%struct.occr = type { %struct.occr*, %struct.rtx_def*, i8, i8 }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.u = type { [1 x i64] }
+
+define void @test() {
+	%tmp = load i32* null		; <i32> [#uses=1]
+	%tmp8 = call i32 @hash_rtx( )		; <i32> [#uses=1]
+	%tmp11 = urem i32 %tmp8, %tmp		; <i32> [#uses=1]
+	br i1 false, label %cond_next, label %return
+
+cond_next:		; preds = %0
+	%gep.upgrd.1 = zext i32 %tmp11 to i64		; <i64> [#uses=1]
+	%tmp17 = getelementptr %struct.expr** null, i64 %gep.upgrd.1		; <%struct.expr**> [#uses=0]
+	ret void
+
+return:		; preds = %0
+	ret void
+}
+
+declare i32 @hash_rtx()

diff --git a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
new file mode 100644
index 0000000..a19d8f7
--- /dev/null
+++ b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
+; RUN:    not grep {movl %eax, %edx}
+
+define i32 @foo(i32 %t, i32 %C) {
+entry:
+        br label %cond_true
+
+cond_true:              ; preds = %cond_true, %entry
+        %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ]             ; <i32> [#uses=2]
+        %tmp7 = add i32 %t_addr.0.0, 1          ; <i32> [#uses=1]
+        %tmp = icmp sgt i32 %C, 39              ; <i1> [#uses=1]
+        br i1 %tmp, label %bb12, label %cond_true
+
+bb12:           ; preds = %cond_true
+        ret i32 %t_addr.0.0
+}
+

diff --git a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
new file mode 100644
index 0000000..1e890bb
--- /dev/null
+++ b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll

@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=x86
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct._var_map = type { %struct.partition_def*, i32*, i32*, %struct.tree_node**, i32, i32, i32* }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_iterator = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, i32 }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.block_stmt_iterator = type { %struct.tree_stmt_iterator, %struct.basic_block_def* }
+	%struct.coalesce_list_d = type { %struct._var_map*, %struct.partition_pair_d**, i1 }
+	%struct.conflict_graph_def = type opaque
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.die_struct = type opaque
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.__sbuf*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_iterator = type { i32, %struct.VEC_edge** }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.elt_list = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.__sbuf, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.__sbuf, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.ht_identifier = type { i8*, i32, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type opaque
+	%struct.machine_function = type { i32, i32, i8*, i32, i32 }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.partition_def = type { i32, [1 x %struct.partition_elem] }
+	%struct.partition_elem = type { i32, %struct.partition_elem*, i32 }
+	%struct.partition_pair_d = type { i32, i32, i32, %struct.partition_pair_d* }
+	%struct.phi_arg_d = type { %struct.tree_node*, i1 }
+	%struct.pointer_set_t = type opaque
+	%struct.ptr_info_def = type { i8, %struct.bitmap_head_def*, %struct.tree_node* }
+	%struct.real_value = type opaque
+	%struct.reg_info_def = type opaque
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type opaque
+	%struct.rtx_def = type opaque
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.ssa_op_iter = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.stmt_operands_d*, i1 }
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
+	%struct.tree_block = type { %struct.tree_common, i8, [3 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl = type { %struct.tree_common, %struct.__sbuf, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u1_a = type { i32 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_exp = type { %struct.tree_common, %struct.__sbuf*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
+	%struct.tree_int_cst_lowhi = type { i64, i64 }
+	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_live_info_d = type { %struct._var_map*, %struct.bitmap_head_def*, %struct.bitmap_head_def**, i32, %struct.bitmap_head_def** }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_partition_associator_d = type { %struct.varray_head_tag*, %struct.varray_head_tag*, i32*, i32*, i32, i32, %struct._var_map* }
+	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
+	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
+	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
+	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
+	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_stmt_iterator = type { %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_decl_u1_a, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.tree_type_symtab = type { i32 }
+	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
+	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
+	%struct.use_operand_ptr = type { %struct.tree_node** }
+	%struct.use_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.v_must_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.value_set = type opaque
+	%struct.var_ann_d = type { %struct.tree_ann_common_d, i8, i8, %struct.tree_node*, %struct.varray_head_tag*, i32, i32, i32, %struct.tree_node*, %struct.tree_node* }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+@basic_block_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=1]
+
+define void @calculate_live_on_entry_cond_true3632(%struct.varray_head_tag* %stack3023.6, i32* %tmp3629, %struct.VEC_edge*** %tmp3397.out) {
+newFuncRoot:
+	br label %cond_true3632
+
+bb3502.exitStub:		; preds = %cond_true3632
+	store %struct.VEC_edge** %tmp3397, %struct.VEC_edge*** %tmp3397.out
+	ret void
+
+cond_true3632:		; preds = %newFuncRoot
+	%tmp3378 = load i32* %tmp3629		; <i32> [#uses=1]
+	%tmp3379 = add i32 %tmp3378, -1		; <i32> [#uses=1]
+	%tmp3381 = getelementptr %struct.varray_head_tag* %stack3023.6, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]
+	%tmp3382 = bitcast %struct.varray_data* %tmp3381 to [1 x i32]*		; <[1 x i32]*> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp3379 to i64		; <i64> [#uses=1]
+	%tmp3383 = getelementptr [1 x i32]* %tmp3382, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	%tmp3384 = load i32* %tmp3383		; <i32> [#uses=1]
+	%tmp3387 = load i32* %tmp3629		; <i32> [#uses=1]
+	%tmp3388 = add i32 %tmp3387, -1		; <i32> [#uses=1]
+	store i32 %tmp3388, i32* %tmp3629
+	%tmp3391 = load %struct.varray_head_tag** @basic_block_info		; <%struct.varray_head_tag*> [#uses=1]
+	%tmp3393 = getelementptr %struct.varray_head_tag* %tmp3391, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]
+	%tmp3394 = bitcast %struct.varray_data* %tmp3393 to [1 x %struct.basic_block_def*]*		; <[1 x %struct.basic_block_def*]*> [#uses=1]
+	%tmp3395 = getelementptr [1 x %struct.basic_block_def*]* %tmp3394, i32 0, i32 %tmp3384		; <%struct.basic_block_def**> [#uses=1]
+	%tmp3396 = load %struct.basic_block_def** %tmp3395		; <%struct.basic_block_def*> [#uses=1]
+	%tmp3397 = getelementptr %struct.basic_block_def* %tmp3396, i32 0, i32 3		; <%struct.VEC_edge**> [#uses=1]
+	br label %bb3502.exitStub
+}

diff --git a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
new file mode 100644
index 0000000..795d464
--- /dev/null
+++ b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s 
+; PR933
+
+define fastcc i1 @test() {
+        ret i1 true
+}
+

diff --git a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
new file mode 100644
index 0000000..bf9fa57
--- /dev/null
+++ b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=sse | grep movaps
+; Test that the load is NOT folded into the intrinsic, which would zero the top
+; elts of the loaded vector.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+
+define <4 x float> @test(<4 x float> %A, <4 x float>* %B) {
+        %BV = load <4 x float>* %B              ; <<4 x float>> [#uses=1]
+        %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %A, <4 x float> %BV )       ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp28
+}
+
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+

diff --git a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
new file mode 100644
index 0000000..fbb14ee
--- /dev/null
+++ b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+
+define void @_ZN13QFSFileEngine4readEPcx() {
+	%tmp201 = load i32* null		; <i32> [#uses=1]
+	%tmp201.upgrd.1 = sext i32 %tmp201 to i64		; <i64> [#uses=1]
+	%tmp202 = load i64* null		; <i64> [#uses=1]
+	%tmp203 = add i64 %tmp201.upgrd.1, %tmp202		; <i64> [#uses=1]
+	store i64 %tmp203, i64* null
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
new file mode 100644
index 0000000..b1f0451
--- /dev/null
+++ b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep shrl
+; Bug in FindModifiedNodeSlot cause tmp14 load to become a zextload and shr 31
+; is then optimized away.
+@tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+
+define void @copy_if_shared_r() {
+	%tmp = load i32* null		; <i32> [#uses=1]
+	%tmp56 = and i32 %tmp, 255		; <i32> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp56 to i64		; <i64> [#uses=1]
+	%tmp8 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %gep.upgrd.1	; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, -1		; <i32> [#uses=1]
+	%tmp.upgrd.2 = icmp ugt i32 %tmp10, 2		; <i1> [#uses=1]
+	%tmp14 = load i32* null		; <i32> [#uses=1]
+	%tmp15 = lshr i32 %tmp14, 31		; <i32> [#uses=1]
+	%tmp15.upgrd.3 = trunc i32 %tmp15 to i8		; <i8> [#uses=1]
+	%tmp16 = icmp ne i8 %tmp15.upgrd.3, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.2, label %cond_false25, label %cond_true
+cond_true:		; preds = %0
+	br i1 %tmp16, label %cond_true17, label %cond_false
+cond_true17:		; preds = %cond_true
+	ret void
+cond_false:		; preds = %cond_true
+	ret void
+cond_false25:		; preds = %0
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
new file mode 100644
index 0000000..3b987ac
--- /dev/null
+++ b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86
+	%struct.function = type opaque
+	%struct.lang_decl = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.rtx_def = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%union.tree_ann_d = type opaque
+
+define void @check_format_arg() {
+	br i1 false, label %cond_next196, label %bb12.preheader
+
+bb12.preheader:		; preds = %0
+	ret void
+
+cond_next196:		; preds = %0
+	br i1 false, label %cond_next330, label %cond_true304
+
+cond_true304:		; preds = %cond_next196
+	ret void
+
+cond_next330:		; preds = %cond_next196
+	br i1 false, label %cond_next472, label %bb441
+
+bb441:		; preds = %cond_next330
+	ret void
+
+cond_next472:		; preds = %cond_next330
+	%tmp490 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp492 = getelementptr %struct.tree_node* %tmp490, i32 0, i32 0, i32 0, i32 3		; <i8*> [#uses=1]
+	%tmp492.upgrd.1 = bitcast i8* %tmp492 to i32*		; <i32*> [#uses=1]
+	%tmp493 = load i32* %tmp492.upgrd.1		; <i32> [#uses=1]
+	%tmp495 = trunc i32 %tmp493 to i8		; <i8> [#uses=1]
+	%tmp496 = icmp eq i8 %tmp495, 11		; <i1> [#uses=1]
+	%tmp496.upgrd.2 = zext i1 %tmp496 to i8		; <i8> [#uses=1]
+	store i8 %tmp496.upgrd.2, i8* null
+	ret void
+}

diff --git a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
new file mode 100644
index 0000000..6ed2e7b
--- /dev/null
+++ b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86
+@str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
+
+define void @test() {
+bb.i:
+	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]
+	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp177.i = and <4 x i32> %tmp176.i, %tmp175.i		; <<4 x i32>> [#uses=1]
+	%tmp190.i = or <4 x i32> %tmp177.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp191.i = bitcast <4 x i32> %tmp190.i to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp191.i, <4 x float>* null
+	ret void
+}
+
+declare void @printf(i32, ...)

diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
new file mode 100644
index 0000000..88e8b4a
--- /dev/null
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
+
+@str = internal constant [14 x i8] c"Hello world!\0A\00"		; <[14 x i8]*> [#uses=1]
+@str.upgrd.1 = internal constant [13 x i8] c"Blah world!\0A\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @test(i32 %argc, i8** %argv) nounwind {
+entry:
+; CHECK: cmpl	$2
+; CHECK-NEXT: je
+; CHECK-NEXT: %entry
+
+	switch i32 %argc, label %UnifiedReturnBlock [
+		 i32 1, label %bb
+		 i32 2, label %bb2
+	]
+
+bb:		; preds = %entry
+	%tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @str, i32 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+
+bb2:		; preds = %entry
+	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str.upgrd.1, i32 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
new file mode 100644
index 0000000..91210ea
--- /dev/null
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86 | grep {subl	\$4, %esp}
+
+target triple = "i686-pc-linux-gnu"
+@str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+        %retval = alloca i32, align 4           ; <i32*> [#uses=1]
+        %tmp = alloca { double, double }, align 16              ; <{ double, double }*> [#uses=4]
+        %tmp1 = alloca { double, double }, align 16             ; <{ double, double }*> [#uses=4]
+        %tmp2 = alloca { double, double }, align 16             ; <{ double, double }*> [#uses=3]
+        %pi = alloca double, align 8            ; <double*> [#uses=2]
+        %z = alloca { double, double }, align 16                ; <{ double, double }*> [#uses=4]
+        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+        store double 0x400921FB54442D18, double* %pi
+        %tmp.upgrd.1 = load double* %pi         ; <double> [#uses=1]
+        %real = getelementptr { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]
+        store double 0.000000e+00, double* %real
+        %real3 = getelementptr { double, double }* %tmp1, i64 0, i32 1          ; <double*> [#uses=1]
+        store double %tmp.upgrd.1, double* %real3
+        %tmp.upgrd.2 = getelementptr { double, double }* %tmp, i64 0, i32 0             ; <double*> [#uses=1]
+        %tmp4 = getelementptr { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]
+        %tmp5 = load double* %tmp4              ; <double> [#uses=1]
+        store double %tmp5, double* %tmp.upgrd.2
+        %tmp6 = getelementptr { double, double }* %tmp, i64 0, i32 1            ; <double*> [#uses=1]
+        %tmp7 = getelementptr { double, double }* %tmp1, i64 0, i32 1           ; <double*> [#uses=1]
+        %tmp8 = load double* %tmp7              ; <double> [#uses=1]
+        store double %tmp8, double* %tmp6
+        %tmp.upgrd.3 = bitcast { double, double }* %tmp to { i64, i64 }*                ; <{ i64, i64 }*> [#uses=1]
+        %tmp.upgrd.4 = getelementptr { i64, i64 }* %tmp.upgrd.3, i64 0, i32 0           ; <i64*> [#uses=1]
+        %tmp.upgrd.5 = load i64* %tmp.upgrd.4           ; <i64> [#uses=1]
+        %tmp9 = bitcast { double, double }* %tmp to { i64, i64 }*               ; <{ i64, i64 }*> [#uses=1]
+        %tmp10 = getelementptr { i64, i64 }* %tmp9, i64 0, i32 1                ; <i64*> [#uses=1]
+        %tmp11 = load i64* %tmp10               ; <i64> [#uses=1]
+        call void @cexp( { double, double }* sret  %tmp2, i64 %tmp.upgrd.5, i64 %tmp11 )
+        %tmp12 = getelementptr { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]
+        %tmp13 = getelementptr { double, double }* %tmp2, i64 0, i32 0          ; <double*> [#uses=1]
+        %tmp14 = load double* %tmp13            ; <double> [#uses=1]
+        store double %tmp14, double* %tmp12
+        %tmp15 = getelementptr { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]
+        %tmp16 = getelementptr { double, double }* %tmp2, i64 0, i32 1          ; <double*> [#uses=1]
+        %tmp17 = load double* %tmp16            ; <double> [#uses=1]
+        store double %tmp17, double* %tmp15
+        %tmp18 = getelementptr { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]
+        %tmp19 = load double* %tmp18            ; <double> [#uses=1]
+        %tmp20 = getelementptr { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]
+        %tmp21 = load double* %tmp20            ; <double> [#uses=1]
+        %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
+        %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
+        br label %return
+return:         ; preds = %entry
+        %retval.upgrd.8 = load i32* %retval             ; <i32> [#uses=1]
+        ret i32 %retval.upgrd.8
+}
+
+declare void @cexp({ double, double }* sret , i64, i64)
+
+declare i32 @printf(i8*, ...)
+

diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
new file mode 100644
index 0000000..e839d72
--- /dev/null
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep movb %t | count 2
+; RUN: grep {movzb\[wl\]} %t
+
+
+define void @handle_vector_size_attribute() nounwind {
+entry:
+	%tmp69 = load i32* null		; <i32> [#uses=1]
+	switch i32 %tmp69, label %bb84 [
+		 i32 2, label %bb77
+		 i32 1, label %bb77
+	]
+
+bb77:		; preds = %entry, %entry
+	%tmp99 = udiv i64 0, 0		; <i64> [#uses=1]
+	%tmp = load i8* null		; <i8> [#uses=1]
+	%tmp114 = icmp eq i64 0, 0		; <i1> [#uses=1]
+	br i1 %tmp114, label %cond_true115, label %cond_next136
+
+bb84:		; preds = %entry
+	ret void
+
+cond_true115:		; preds = %bb77
+	%tmp118 = load i8* null		; <i8> [#uses=1]
+	br i1 false, label %cond_next129, label %cond_true120
+
+cond_true120:		; preds = %cond_true115
+	%tmp127 = udiv i8 %tmp, %tmp118		; <i8> [#uses=1]
+	%tmp127.upgrd.1 = zext i8 %tmp127 to i64		; <i64> [#uses=1]
+	br label %cond_next129
+
+cond_next129:		; preds = %cond_true120, %cond_true115
+	%iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ], [ 0, %cond_true115 ]		; <i64> [#uses=1]
+	%tmp132 = icmp eq i64 %iftmp.30.0, %tmp99		; <i1> [#uses=1]
+	br i1 %tmp132, label %cond_false148, label %cond_next136
+
+cond_next136:		; preds = %cond_next129, %bb77
+	ret void
+
+cond_false148:		; preds = %cond_next129
+	ret void
+}

diff --git a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
new file mode 100644
index 0000000..ea2e6db
--- /dev/null
+++ b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep test.*1
+; PR1016
+
+define i32 @test(i32 %A, i32 %B, i32 %C) {
+        %a = trunc i32 %A to i1         ; <i1> [#uses=1]
+        %D = select i1 %a, i32 %B, i32 %C               ; <i32> [#uses=1]
+        ret i32 %D
+}
+

diff --git a/test/CodeGen/X86/2006-11-28-Memcpy.ll b/test/CodeGen/X86/2006-11-28-Memcpy.ll
new file mode 100644
index 0000000..8c1573f
--- /dev/null
+++ b/test/CodeGen/X86/2006-11-28-Memcpy.ll

@@ -0,0 +1,34 @@
+; PR1022, PR1023
+; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
+; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
+
+@fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
+@bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
+@bytes2 = global [4 x i8] c"\AA\BB\CC\DD"               ; <[4 x i8]*> [#uses=1]
+
+define i32 @test1() nounwind {
+        %y = alloca i32         ; <i32*> [#uses=2]
+        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
+        %z = getelementptr [4 x i8]* @bytes, i32 0, i32 0               ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
+        %r = load i32* %y               ; <i32> [#uses=1]
+        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
+        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
+        ret i32 0
+}
+
+define void @test2() nounwind {
+        %y = alloca i32         ; <i32*> [#uses=2]
+        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
+        %z = getelementptr [4 x i8]* @bytes2, i32 0, i32 0              ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
+        %r = load i32* %y               ; <i32> [#uses=1]
+        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
+        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
+        ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i32 @printf(i8*, ...)
+

diff --git a/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll b/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
new file mode 100644
index 0000000..50a244b
--- /dev/null
+++ b/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR1049
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QByteArray = type { %"struct.QByteArray::Data"* }
+	%"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }
+	%struct.QFactoryLoader = type { %struct.QObject }
+	%struct.QImageIOHandler = type { i32 (...)**, %struct.QImageIOHandlerPrivate* }
+	%struct.QImageIOHandlerPrivate = type opaque
+	%struct.QImageWriter = type { %struct.QImageWriterPrivate* }
+	%struct.QImageWriterPrivate = type { %struct.QByteArray, %struct.QFactoryLoader*, i1, %struct.QImageIOHandler*, i32, float, %struct.QString, %struct.QString, i32, %struct.QString, %struct.QImageWriter* }
+	%"struct.QList<QByteArray>" = type { %"struct.QList<QByteArray>::._20" }
+	%"struct.QList<QByteArray>::._20" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QByteArray>", i8, [3 x i8], i32, i32 }
+	%struct.QString = type { %"struct.QString::Data"* }
+	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
+
+define i1 @_ZNK12QImageWriter8canWriteEv() {
+	%tmp62 = load %struct.QImageWriterPrivate** null		; <%struct.QImageWriterPrivate*> [#uses=1]
+	%tmp = getelementptr %struct.QImageWriterPrivate* %tmp62, i32 0, i32 9		; <%struct.QString*> [#uses=1]
+	%tmp75 = call %struct.QString* @_ZN7QStringaSERKS_( %struct.QString* %tmp, %struct.QString* null )		; <%struct.QString*> [#uses=0]
+	call void asm sideeffect "lock\0Adecl $0\0Asetne 1", "=*m"( i32* null )
+	ret i1 false
+}
+
+declare %struct.QString* @_ZN7QStringaSERKS_(%struct.QString*, %struct.QString*)

diff --git a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
new file mode 100644
index 0000000..f81b303
--- /dev/null
+++ b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel
+; PR1061
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define void @bar(i32 %n) {
+entry:
+	switch i32 %n, label %bb12 [
+		 i32 1, label %bb
+		 i32 2, label %bb6
+		 i32 4, label %bb7
+		 i32 5, label %bb8
+		 i32 6, label %bb10
+		 i32 7, label %bb1
+		 i32 8, label %bb3
+		 i32 9, label %bb4
+		 i32 10, label %bb9
+		 i32 11, label %bb2
+		 i32 12, label %bb5
+		 i32 13, label %bb11
+	]
+
+bb:		; preds = %entry
+	call void (...)* @foo1( )
+	ret void
+
+bb1:		; preds = %entry
+	call void (...)* @foo2( )
+	ret void
+
+bb2:		; preds = %entry
+	call void (...)* @foo6( )
+	ret void
+
+bb3:		; preds = %entry
+	call void (...)* @foo3( )
+	ret void
+
+bb4:		; preds = %entry
+	call void (...)* @foo4( )
+	ret void
+
+bb5:		; preds = %entry
+	call void (...)* @foo5( )
+	ret void
+
+bb6:		; preds = %entry
+	call void (...)* @foo1( )
+	ret void
+
+bb7:		; preds = %entry
+	call void (...)* @foo2( )
+	ret void
+
+bb8:		; preds = %entry
+	call void (...)* @foo6( )
+	ret void
+
+bb9:		; preds = %entry
+	call void (...)* @foo3( )
+	ret void
+
+bb10:		; preds = %entry
+	call void (...)* @foo4( )
+	ret void
+
+bb11:		; preds = %entry
+	call void (...)* @foo5( )
+	ret void
+
+bb12:		; preds = %entry
+	call void (...)* @foo6( )
+	ret void
+}
+
+declare void @foo1(...)
+
+declare void @foo2(...)
+
+declare void @foo6(...)
+
+declare void @foo3(...)
+
+declare void @foo4(...)
+
+declare void @foo5(...)

diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
new file mode 100644
index 0000000..317ed0a
--- /dev/null
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll

@@ -0,0 +1,22 @@
+; PR1075
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
+
+define float @foo(float %x) nounwind {
+    %tmp1 = fmul float %x, 3.000000e+00
+    %tmp3 = fmul float %x, 5.000000e+00
+    %tmp5 = fmul float %x, 7.000000e+00
+    %tmp7 = fmul float %x, 1.100000e+01
+    %tmp10 = fadd float %tmp1, %tmp3
+    %tmp12 = fadd float %tmp10, %tmp5
+    %tmp14 = fadd float %tmp12, %tmp7
+    ret float %tmp14
+
+; CHECK:      mulss	LCPI1_3(%rip)
+; CHECK-NEXT: mulss	LCPI1_0(%rip)
+; CHECK-NEXT: mulss	LCPI1_1(%rip)
+; CHECK-NEXT: mulss	LCPI1_2(%rip)
+; CHECK-NEXT: addss
+; CHECK-NEXT: addss
+; CHECK-NEXT: addss
+; CHECK-NEXT: ret
+}

diff --git a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
new file mode 100644
index 0000000..de226a1
--- /dev/null
+++ b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll

@@ -0,0 +1,19 @@
+; RUN: llc %s -o - -march=x86-64 | grep {(%rdi,%rax,8)}
+; RUN: llc %s -o - -march=x86-64 | not grep {addq.*8}
+
+define void @foo(double* %y) nounwind {
+entry:
+        br label %bb
+
+bb:
+        %i = phi i64 [ 0, %entry ], [ %k, %bb ]
+        %j = getelementptr double* %y, i64 %i
+        store double 0.000000e+00, double* %j
+        %k = add i64 %i, 1
+        %n = icmp eq i64 %k, 0
+        br i1 %n, label %return, label %bb
+
+return:
+        ret void
+}
+

diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
new file mode 100644
index 0000000..5e7c0a7
--- /dev/null
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll

@@ -0,0 +1,462 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep leaq %t
+; RUN: not grep {,%rsp)} %t
+; PR1103
+
+target datalayout = "e-p:64:64"
+@i6000 = global [128 x i64] zeroinitializer, align 16
+
+
+define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
+b:
+	%r = load i32* %a0
+	%r2 = load i32* %a1
+	%r4 = load i32* %a2
+	%r6 = load i32* %a3
+	%r8 = load i32* %a4
+	%r14 = load i32* %a5
+	%rx = sext i32 %r2 to i64
+	%r9 = sext i32 %r to i64
+	%r11 = add i64 %rx, 0
+	%ras = icmp slt i64 %r11, 0
+	%r12 = select i1 %ras, i64 0, i64 %r11
+	%r16 = sext i32 %r14 to i64
+	%r17 = sext i32 %r8 to i64
+	%r18 = sub i64 %r16, 0
+	%r19 = add i64 %r18, 0
+	%r20 = icmp slt i64 %r19, 0
+	%r19h = add i64 %r18, 0
+	%r22 = select i1 %r20, i64 1, i64 %r19h
+	%r23 = mul i64 %r22, 0
+	%r23a = trunc i64 %r23 to i32
+	%r24 = shl i32 %r23a, 0
+	%r25 = add i32 %r24, 0
+	%ras2 = alloca i8, i32 %r25, align 16
+	%r28 = getelementptr i8* %ras2, i32 0
+	%r38 = shl i64 %r12, 0
+	%s2013 = add i64 %r38, 0
+	%c22012 = getelementptr i8* %ras2, i64 %s2013
+	%r42 = shl i64 %r12, 0
+	%s2011 = add i64 %r42, 16
+	%c22010 = getelementptr i8* %ras2, i64 %s2011
+	%r50 = add i64 %r16, 0
+	%r51 = icmp slt i64 %r50, 0
+	%r50sh = shl i64 %r50, 0
+	%r50j = add i64 %r50sh, 0
+	%r54 = select i1 %r51, i64 0, i64 %r50j
+	%r56 = mul i64 %r54, %r12
+	%r28s = add i64 %r56, 16
+	%c2 = getelementptr i8* %ras2, i64 %r28s
+	%r60 = sub i32 %r2, %r
+	%r61 = icmp slt i32 %r60, 0
+	br i1 %r61, label %a29b, label %b63
+a29b:
+	%r155 = sub i32 %r6, %r4
+	%r156 = icmp slt i32 %r155, 0
+	br i1 %r156, label %a109b, label %b158
+b63:
+	%r66 = sext i32 %r60 to i64
+	%r67 = add i64 %r66, 0
+	%r76 = mul i64 %r17, 0
+	%r82 = add i64 %r76, 0
+	%r84 = icmp slt i64 %r67, 0
+	br i1 %r84, label %b85, label %a25b
+b85:
+	%e641 = phi i64 [ 0, %b63 ], [ %r129, %a25b ]
+	%r137 = icmp slt i64 %e641, 0
+	br i1 %r137, label %a25b140q, label %a29b
+a25b140q:
+	br label %a25b140
+a25b:
+	%w1989 = phi i64 [ 0, %b63 ], [ %v1990, %a25b ]
+	%e642 = shl i64 %w1989, 0
+	%r129 = add i64 %e642, 0
+	%r132 = add i64 %e642, 0
+	%r134 = icmp slt i64 %r132, 0
+	%v1990 = add i64 %w1989, 0
+	br i1 %r134, label %b85, label %a25b
+a25b140:
+	%w1982 = phi i64 [ 0, %a25b140q ], [ %v1983, %a25b140 ]
+	%r145 = add i64 %r82, 0
+	%v1983 = add i64 %w1982, 0
+	%u1987 = icmp slt i64 %v1983, 0
+	br i1 %u1987, label %a29b, label %a25b140
+b158:
+	%r161 = sext i32 %r to i64
+	%r163 = sext i32 %r4 to i64
+	br label %a29b173
+a29b173:
+	%w1964 = phi i64 [ 0, %b158 ], [ %v1973, %b1606 ]
+	%b1974 = mul i64 %r163, 0
+	%b1975 = add i64 %r161, 0
+	%b1976 = mul i64 %w1964, 0
+	%b1977 = add i64 %b1976, 0
+	%s761 = bitcast i64 %b1977 to i64
+	%b1980 = mul i64 %w1964, 0
+	%s661 = add i64 %b1980, 0
+	br i1 %r61, label %a33b, label %b179
+a33b:
+	%r328 = icmp slt i32 %r14, 0
+	%r335 = or i1 %r328, %r61
+	br i1 %r335, label %a50b, label %b341
+b179:
+	%r182 = sext i32 %r60 to i64
+	%r183 = add i64 %r182, 0
+	%r187 = icmp slt i64 %r183, 0
+	br i1 %r187, label %b188, label %a30b
+b188:
+	%e653 = phi i64 [ 0, %b179 ], [ %r283, %a30b ]
+	%r291 = icmp slt i64 %e653, 0
+	br i1 %r291, label %a30b294q, label %a33b
+a30b294q:
+	br label %a30b294
+a30b:
+	%w = phi i64 [ 0, %b179 ], [ %v, %a30b ]
+	%b2 = shl i64 %w, 0
+	%r283 = add i64 %b2, 0
+	%r286 = add i64 %b2, 0
+	%r288 = icmp slt i64 %r286, 0
+	%v = add i64 %w, 0
+	br i1 %r288, label %b188, label %a30b
+a30b294:
+	%w1847 = phi i64 [ 0, %a30b294q ], [ %v1848, %a30b294 ]
+	%v1848 = add i64 %w1847, 0
+	%u = icmp slt i64 %v1848, 0
+	br i1 %u, label %a33b, label %a30b294
+a50b:
+	%r814 = add i32 %r14, 0
+	%r815 = icmp slt i32 %r814, 0
+	%r817 = or i1 %r61, %r815
+	br i1 %r817, label %a57b, label %b820
+b341:
+	%w1874 = phi i64 [ 0, %a33b ], [ %v1880, %b463 ]
+	%d753 = bitcast i64 %w1874 to i64
+	%r343 = add i64 %s661, 0
+	%r346 = add i64 %r343, 0
+	%r347 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r346
+	%r348 = load float* %r347
+	%r352 = add i64 %r343, 0
+	%r353 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352
+	%r354 = load float* %r353
+	%r362 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r363 = fadd float 0.000000e+00, %r362
+	%r370 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r376 = icmp slt i64 %r16, 0
+	br i1 %r376, label %b377, label %a35b
+b377:
+	%d753p = phi i64 [ %d753, %b341 ], [ %r411, %a35b ]
+	%s761p = phi i64 [ %s761, %b341 ], [ 322, %a35b ]
+	%e784 = phi i64 [ 0, %b341 ], [ %r454, %a35b ]
+	%s794 = add i64 %d753p, 0
+	%r462 = icmp slt i64 %e784, 0
+	br i1 %r462, label %a35b465, label %b463
+a35b:
+	%w1865 = phi i64 [ 0, %b341 ], [ %v1866, %a35b ]
+	%e785 = shl i64 %w1865, 0
+	%b1877 = mul i64 %w1865, 0
+	%s795 = add i64 %b1877, 0
+	%r399 = fadd float %r354, 0.000000e+00
+	%r402 = fadd float %r370, 0.000000e+00
+	%r403 = fadd float %r348, 0.000000e+00
+	%r411 = add i64 %s795, 0
+	%r431 = fadd float %r362, 0.000000e+00
+	%r454 = add i64 %e785, 0
+	%r457 = add i64 %e785, 0
+	%r459 = icmp slt i64 %r457, 0
+	%v1866 = add i64 %w1865, 0
+	br i1 %r459, label %b377, label %a35b
+b463:
+	%r506 = add i64 %d753, 0
+	%r511 = sext i32 %r60 to i64
+	%r512 = add i64 %r511, 0
+	%r513 = icmp slt i64 %r506, 0
+	%v1880 = add i64 %w1874, 0
+	br i1 %r513, label %b341, label %b514
+a35b465:
+	%r469 = add i64 %s794, 0
+	br label %b463
+b514:
+	%r525 = mul i64 %r17, 0
+	%r533 = add i64 %r525, 0
+	br label %b535
+b535:
+	%w1855 = phi i64 [ 0, %b514 ], [ %v1856, %b712 ]
+	%s923 = phi i64 [ 0, %b514 ], [ %r799, %b712 ]
+	%s933 = phi i64 [ %r533, %b514 ], [ %r795, %b712 ]
+	%r538 = add i64 %w1855, 0
+	%r539 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r538
+	%r540 = load float* %r539
+	%r551 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r562 = sub i64 %s933, 0
+	%r564 = icmp slt i64 %r512, 0
+	br i1 %r564, label %b565, label %a45b
+b565:
+	%e944 = phi i64 [ 0, %b535 ], [ %r703, %a45b ]
+	%r711 = icmp slt i64 %e944, 0
+	br i1 %r711, label %a45b714, label %b712
+a45b:
+	%w1852 = phi i64 [ 0, %b535 ], [ %v1853, %a45b ]
+	%e945 = shl i64 %w1852, 0
+	%r609 = add i64 %r562, 0
+	%r703 = add i64 %e945, 0
+	%r706 = add i64 %e945, 0
+	%r708 = icmp slt i64 %r706, 0
+	%v1853 = add i64 %w1852, 0
+	br i1 %r708, label %b565, label %a45b
+b712:
+	%r795 = add i64 %rx, 0
+	%r799 = add i64 %s923, 0
+	%r802 = add i64 %w1855, 0
+	%r807 = icmp slt i64 %r802, 0
+	%v1856 = add i64 %w1855, 0
+	br i1 %r807, label %b535, label %a50b
+a45b714:
+	%r717 = add i64 %e944, 0
+	%r720 = add i64 %r717, 0
+	%r721 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r720
+	%r722 = load float* %r721
+	%r726 = add i64 %r717, 0
+	%r727 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r726
+	%r728 = load float* %r727
+	%r732 = add i64 %r717, 0
+	%r733 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r732
+	%r734 = load float* %r733
+	%r738 = add i64 %r717, 0
+	%r739 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r738
+	%r740 = load float* %r739
+	%r744 = add i64 %r717, 0
+	%r745 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r744
+	%r746 = load float* %r745
+	%r750 = add i64 %r717, 0
+	%r751 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750
+	%r752 = load float* %r751
+	%r753 = fadd float %r752, %r746
+	%r754 = fadd float %r728, %r722
+	%r755 = fadd float %r734, %r754
+	%r756 = fadd float %r755, %r740
+	%r757 = fadd float %r753, %r756
+	%r759 = fadd float %r757, %r540
+	%r770 = add i64 %r717, 0
+	%r771 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770
+	%r772 = load float* %r771
+	%r776 = add i64 %r717, 0
+	%r777 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776
+	%r778 = load float* %r777
+	%r781 = fadd float %r363, %r772
+	%r782 = fadd float %r781, %r778
+	%r783 = fadd float %r551, %r782
+	br label %b712
+a57b:
+	br i1 %r335, label %a66b, label %b1086
+b820:
+	%r823 = sext i32 %r2 to i64
+	%r834 = sext i32 %r8 to i64
+	%r844 = add i64 %r16, 0
+	%r846 = sext i32 %r60 to i64
+	%r847 = add i64 %r846, 0
+	%r851 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r856 = sub i64 %rx, 0
+	br label %b858
+b858:
+	%w1891 = phi i64 [ 0, %b820 ], [ %v1892, %b1016 ]
+	%s1193 = phi i64 [ 0, %b820 ], [ %r1068, %b1016 ]
+	%b1894 = mul i64 %r834, 0
+	%b1896 = shl i64 %r823, 0
+	%b1902 = mul i64 %w1891, 0
+	%s1173 = add i64 %b1902, 0
+	%r859 = add i64 %r856, 0
+	%r862 = add i64 %w1891, 0
+	%r863 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r862
+	%r864 = load float* %r863
+	%r868 = add i64 %w1891, 0
+	%r869 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r868
+	%r870 = load float* %r869
+	%r873 = sub i64 %r859, 0
+	%r876 = sub i64 %s1173, 0
+	%r878 = icmp slt i64 %r847, 0
+	br i1 %r878, label %b879, label %a53b
+b879:
+	%e1204 = phi i64 [ 0, %b858 ], [ %r1007, %a53b ]
+	%r1015 = icmp slt i64 %e1204, 0
+	br i1 %r1015, label %a53b1019q, label %b1016
+a53b1019q:
+	%b1888 = sub i64 %r846, 0
+	%b1889 = add i64 %b1888, 0
+	br label %a53b1019
+a53b:
+	%w1881 = phi i64 [ 0, %b858 ], [ %v1882, %a53b ]
+	%e1205 = shl i64 %w1881, 0
+	%r1007 = add i64 %e1205, 0
+	%r1010 = add i64 %e1205, 0
+	%r1012 = icmp slt i64 %r1010, 0
+	%v1882 = add i64 %w1881, 0
+	br i1 %r1012, label %b879, label %a53b
+b1016:
+	%r1068 = add i64 %s1193, 0
+	%r1071 = add i64 %w1891, 0
+	%r1073 = icmp slt i64 %r1071, %r844
+	%v1892 = add i64 %w1891, 0
+	br i1 %r1073, label %b858, label %a57b
+a53b1019:
+	%w1885 = phi i64 [ 0, %a53b1019q ], [ %v1886, %a53b1019 ]
+	%r1022 = add i64 %r876, 0
+	%r1024 = bitcast i8* %c2 to float*
+	%r1025 = add i64 %r1022, 0
+	%r1026 = getelementptr float* %r1024, i64 %r1025
+	%r1027 = load float* %r1026
+	%r1032 = add i64 %r873, 0
+	%r1033 = add i64 %r1032, 0
+	%r1034 = getelementptr float* %r1024, i64 %r1033
+	%r1035 = load float* %r1034
+	%r1037 = bitcast i8* %c22010 to float*
+	%r1040 = getelementptr float* %r1037, i64 %r1025
+	%r1044 = fadd float %r864, %r1035
+	%r1046 = fadd float %r870, %r1027
+	%r1047 = fadd float %r1044, %r1046
+	%r1048 = fadd float %r851, %r1047
+	%v1886 = add i64 %w1885, 0
+	%u1890 = icmp slt i64 %v1886, %b1889
+	br i1 %u1890, label %b1016, label %a53b1019
+a66b:
+	br i1 %r817, label %a93b, label %b1321
+b1086:
+	%r1089 = sext i32 %r2 to i64
+	%r1090 = add i64 %rx, 0
+	%r1096 = mul i64 %r9, 0
+	%r1101 = sext i32 %r8 to i64
+	%r1104 = add i64 %r1096, 0
+	%r1108 = sub i64 %r1104, 0
+	%r1110 = sext i32 %r60 to i64
+	%r1111 = add i64 %r1110, 0
+	%r1113 = sext i32 %r14 to i64
+	%r1114 = add i64 %r16, 0
+	br label %b1117
+b1117:
+	%w1915 = phi i64 [ 0, %b1086 ], [ %v1957, %b1263 ]
+	%d1353 = bitcast i64 %w1915 to i64
+	%r1120 = add i64 %s661, 0
+	%r1121 = add i64 %r1120, 0
+	%r1122 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1121
+	%r1123 = load float* %r1122
+	%r1132 = bitcast i8* %c22012 to float*
+	%r1134 = getelementptr float* %r1132, i64 %w1915
+	%r1135 = load float* %r1134
+	%r1136 = fadd float %r1123, %r1135
+	%r1138 = icmp slt i64 %r1114, 0
+	br i1 %r1138, label %b1139, label %a63b
+b1139:
+	%e1364 = phi i64 [ 0, %b1117 ], [ %r1254, %a63b ]
+	%p1998 = phi i64 [ %s761, %b1117 ], [ %r1216, %a63b ]
+	%r1108p = phi i64 [ %r1108, %b1117 ], [ %r1219, %a63b ]
+	%p2004 = phi i64 [ %d1353, %b1117 ], [ %r1090, %a63b ]
+	%s1374 = phi i64 [ 0, %b1117 ], [ %r1251, %a63b ]
+	%s1384 = add i64 %r1108p, 0
+	%s1394 = add i64 %p1998, 0
+	%r1262 = icmp slt i64 %e1364, %r1114
+	br i1 %r1262, label %a63b1266q, label %b1263
+a63b1266q:
+	%b1947 = sub i64 %r1113, 0
+	%b1948 = add i64 %b1947, 0
+	br label %a63b1266
+a63b:
+	%w1904 = phi i64 [ 0, %b1117 ], [ %v1905, %a63b ]
+	%s1375 = phi i64 [ 0, %b1117 ], [ %r1251, %a63b ]
+	%b1906 = add i64 %r1089, 0
+	%b1907 = mul i64 %r1101, 0
+	%b1929 = mul i64 %w1904, 0
+	%s1395 = add i64 %b1929, 0
+	%e1365 = shl i64 %w1904, 0
+	%r1163 = add i64 %r1090, 0
+	%r1167 = add i64 %s1375, 0
+	%r1191 = add i64 %r1163, 0
+	%r1195 = add i64 %r1167, 0
+	%r1216 = add i64 %s1395, 0
+	%r1219 = add i64 %r1191, 0
+	%r1223 = add i64 %r1195, 0
+	%r1251 = add i64 %r1223, 0
+	%r1254 = add i64 %e1365, 0
+	%r1257 = add i64 %e1365, 0
+	%r1259 = icmp slt i64 %r1257, %r1114
+	%v1905 = add i64 %w1904, 0
+	br i1 %r1259, label %b1139, label %a63b
+b1263:
+	%r1306 = add i64 %d1353, 0
+	%r1308 = icmp slt i64 %r1306, %r1111
+	%v1957 = add i64 %w1915, 0
+	br i1 %r1308, label %b1117, label %a66b
+a63b1266:
+	%w1944 = phi i64 [ 0, %a63b1266q ], [ %v1945, %a63b1266 ]
+	%s1377 = phi i64 [ %s1374, %a63b1266q ], [ %r1297, %a63b1266 ]
+	%r1282 = fadd float %r1136, 0.000000e+00
+	%r1297 = add i64 %s1377, 0
+	%v1945 = add i64 %w1944, 0
+	%u1949 = icmp slt i64 %v1945, %b1948
+	br i1 %u1949, label %b1263, label %a63b1266
+a93b:
+	br i1 %r61, label %b1606, label %a97b
+b1321:
+	%r1331 = mul i64 %r17, 0
+	%r1339 = add i64 %r1331, 0
+	br label %b1342
+b1342:
+	%w1960 = phi i64 [ 0, %b1321 ], [ %v1961, %b1582 ]
+	%s1523 = phi i64 [ %r1339, %b1321 ], [ %r1587, %b1582 ]
+	%s1563 = phi i64 [ 0, %b1321 ], [ %r1591, %b1582 ]
+	%d1533 = bitcast i64 %w1960 to i64
+	%b1968 = mul i64 %w1960, 0
+	%s1543 = add i64 %b1968, 0
+	%r1345 = add i64 %s1523, 0
+	%r1348 = sub i64 %r1345, 0
+	%r1352 = add i64 %s1523, 0
+	%r1355 = sub i64 %r1352, 0
+	%r1370 = add i64 %d1533, 0
+	%r1371 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1370
+	%r1372 = load float* %r1371
+	br label %a74b
+a74b:
+	%w1958 = phi i64 [ 0, %b1342 ], [ %v1959, %a74b ]
+	%r1379 = add i64 %s1543, 0
+	%r1403 = add i64 %r1355, 0
+	%r1422 = add i64 %r1348, 0
+	%r1526 = fadd float %r1372, 0.000000e+00
+	%r1573 = add i64 %w1958, 0
+	%r1581 = icmp slt i64 %r1573, 0
+	%v1959 = add i64 %w1958, 0
+	br i1 %r1581, label %a74b, label %b1582
+b1582:
+	%r1587 = add i64 %rx, 0
+	%r1591 = add i64 %s1563, 0
+	%r1596 = add i64 %d1533, 0
+	%r1601 = icmp slt i64 %r1596, 0
+	%v1961 = add i64 %w1960, 0
+	br i1 %r1601, label %b1342, label %a93b
+b1606:
+	%r1833 = add i64 %w1964, 0
+	%r1840 = icmp slt i64 %r1833, 0
+	%v1973 = add i64 %w1964, 0
+	br i1 %r1840, label %a29b173, label %a109b
+a97b:
+	%w1970 = phi i64 [ 0, %a93b ], [ %v1971, %a97b ]
+	%r1613 = add i64 %w1964, 0
+	%r1614 = mul i64 %r1613, 0
+	%r1622 = add i64 %r1614, 0
+	%r1754 = bitcast i8* %r28 to float*
+	%r1756 = getelementptr float* %r1754, i64 %w1970
+	%r1757 = load float* %r1756
+	%r1761 = add i64 %r1622, 0
+	%r1762 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1761
+	%r1763 = load float* %r1762
+	%r1767 = add i64 %r1622, 0
+	%r1768 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767
+	%r1772 = fadd float %r1763, 0.000000e+00
+	%r1773 = fadd float %r1772, 0.000000e+00
+	%r1809 = fadd float %r1757, 0.000000e+00
+	%r1810 = fadd float %r1773, %r1809
+	store float %r1810, float* %r1768
+	%r1818 = add i64 %w1970, 0
+	%r1826 = icmp slt i64 %r1818, 0
+	%v1971 = add i64 %w1970, 0
+	br i1 %r1826, label %a97b, label %b1606
+a109b:
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
new file mode 100644
index 0000000..e83e2e5
--- /dev/null
+++ b/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86
+; Test 'ri' constraint.
+
+define void @run_init_process() {
+          %tmp = call i32 asm sideeffect "push %ebx ; movl $2,%ebx ; int $$0x80 ; pop %ebx", "={ax},0,ri,{cx},{dx},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 11, i32 0, i32 0, i32 0 )          
+          unreachable
+  }

diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
new file mode 100644
index 0000000..10bbe74
--- /dev/null
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | grep {orl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {leal	3(,%eax,8)}
+
+;; This example can't fold the or into an LEA.
+define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
+	%tmp3 = load float** %tmp2
+	%tmp132 = shl i32 %tmp12, 2		; <i32> [#uses=1]
+	%tmp4 = bitcast float* %tmp3 to i8*		; <i8*> [#uses=1]
+	%ctg2 = getelementptr i8* %tmp4, i32 %tmp132		; <i8*> [#uses=1]
+	%tmp6 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
+	%tmp14 = or i32 %tmp6, 1		; <i32> [#uses=1]
+	ret i32 %tmp14
+}
+
+
+;; This can!
+define i32 @test2(i32 %a, i32 %b) nounwind {
+	%c = shl i32 %a, 3
+	%d = or i32 %c, 3
+	ret i32 %d
+}

diff --git a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
new file mode 100644
index 0000000..954c95d
--- /dev/null
+++ b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
+; PR1027
+
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+@stderr = external global %struct._IO_FILE*
+
+define void @__eprintf(i8* %string, i8* %expression, i32 %line, i8* %filename) {
+	%tmp = load %struct._IO_FILE** @stderr
+	%tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )
+	%tmp6 = load %struct._IO_FILE** @stderr
+	%tmp7 = tail call i32 @fflush( %struct._IO_FILE* %tmp6 )
+	tail call void @abort( )
+	unreachable
+}
+
+declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
+
+declare i32 @fflush(%struct._IO_FILE*)
+
+declare void @abort()

diff --git a/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll b/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
new file mode 100644
index 0000000..a8f0e57
--- /dev/null
+++ b/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll

@@ -0,0 +1,13 @@
+; PR1219
+; RUN: llc < %s -march=x86 | grep {movl	\$1, %eax}
+
+define i32 @test(i1 %X) {
+old_entry1:
+        %hvar2 = zext i1 %X to i32
+	%C = icmp sgt i32 %hvar2, -1
+	br i1 %C, label %cond_true15, label %cond_true
+cond_true15:
+        ret i32 1
+cond_true:
+        ret i32 2
+}

diff --git a/test/CodeGen/X86/2007-02-25-FastCCStack.ll b/test/CodeGen/X86/2007-02-25-FastCCStack.ll
new file mode 100644
index 0000000..2e2b56d
--- /dev/null
+++ b/test/CodeGen/X86/2007-02-25-FastCCStack.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium3
+
+define internal fastcc double @ggc_rlimit_bound(double %limit) {
+    ret double %limit
+}

diff --git a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
new file mode 100644
index 0000000..112d1ab
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
+
+define void @test() nounwind {
+test.exit:
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]
+	load <4 x float>* null		; <<4 x float>>:1 [#uses=1]
+	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:2 [#uses=1]
+	fmul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]
+	fsub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]
+	fmul <4 x float> %4, zeroinitializer		; <<4 x float>>:5 [#uses=2]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:6 [#uses=1]
+	and <4 x i32> %6, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>>:7 [#uses=1]
+	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=2]
+	extractelement <4 x float> %8, i32 0		; <float>:9 [#uses=1]
+	extractelement <4 x float> %8, i32 1		; <float>:10 [#uses=2]
+	br i1 false, label %11, label %19
+
+; <label>:11		; preds = %test.exit
+	br i1 false, label %17, label %12
+
+; <label>:12		; preds = %11
+	br i1 false, label %19, label %13
+
+; <label>:13		; preds = %12
+	fsub float -0.000000e+00, 0.000000e+00		; <float>:14 [#uses=1]
+	%tmp207 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
+	%tmp208 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
+	fsub float -0.000000e+00, %tmp208		; <float>:15 [#uses=1]
+	%tmp155 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
+	%tmp156 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
+	fsub float -0.000000e+00, %tmp156		; <float>:16 [#uses=1]
+	br label %19
+
+; <label>:17		; preds = %11
+	br i1 false, label %19, label %18
+
+; <label>:18		; preds = %17
+	br label %19
+
+; <label>:19		; preds = %18, %17, %13, %12, %test.exit
+	phi i32 [ 5, %18 ], [ 3, %13 ], [ 1, %test.exit ], [ 2, %12 ], [ 4, %17 ]		; <i32>:20 [#uses=0]
+	phi float [ 0.000000e+00, %18 ], [ %16, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:21 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %tmp155, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:22 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %15, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:23 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %tmp207, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:24 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %10, %13 ], [ %9, %test.exit ], [ %10, %12 ], [ 0.000000e+00, %17 ]		; <float>:25 [#uses=2]
+	phi float [ 0.000000e+00, %18 ], [ %14, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:26 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ 0.000000e+00, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:27 [#uses=1]
+	insertelement <4 x float> undef, float %27, i32 0		; <<4 x float>>:28 [#uses=1]
+	insertelement <4 x float> %28, float %26, i32 1		; <<4 x float>>:29 [#uses=0]
+	insertelement <4 x float> undef, float %24, i32 0		; <<4 x float>>:30 [#uses=1]
+	insertelement <4 x float> %30, float %23, i32 1		; <<4 x float>>:31 [#uses=1]
+	insertelement <4 x float> %31, float %25, i32 2		; <<4 x float>>:32 [#uses=1]
+	insertelement <4 x float> %32, float %25, i32 3		; <<4 x float>>:33 [#uses=1]
+	fdiv <4 x float> %33, zeroinitializer		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> %34, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:35 [#uses=1]
+	insertelement <4 x float> undef, float %22, i32 0		; <<4 x float>>:36 [#uses=1]
+	insertelement <4 x float> %36, float %21, i32 1		; <<4 x float>>:37 [#uses=0]
+	br i1 false, label %foo.exit, label %38
+
+; <label>:38		; preds = %19
+	extractelement <4 x float> %0, i32 0		; <float>:39 [#uses=1]
+	fcmp ogt float %39, 0.000000e+00		; <i1>:40 [#uses=1]
+	extractelement <4 x float> %0, i32 2		; <float>:41 [#uses=1]
+	extractelement <4 x float> %0, i32 1		; <float>:42 [#uses=1]
+	fsub float -0.000000e+00, %42		; <float>:43 [#uses=2]
+	%tmp189 = extractelement <4 x float> %5, i32 2		; <float> [#uses=1]
+	br i1 %40, label %44, label %46
+
+; <label>:44		; preds = %38
+	fsub float -0.000000e+00, %tmp189		; <float>:45 [#uses=0]
+	br label %foo.exit
+
+; <label>:46		; preds = %38
+	%tmp192 = extractelement <4 x float> %5, i32 1		; <float> [#uses=1]
+	fsub float -0.000000e+00, %tmp192		; <float>:47 [#uses=1]
+	br label %foo.exit
+
+foo.exit:		; preds = %46, %44, %19
+	phi float [ 0.000000e+00, %44 ], [ %47, %46 ], [ 0.000000e+00, %19 ]		; <float>:48 [#uses=0]
+	phi float [ %43, %44 ], [ %43, %46 ], [ 0.000000e+00, %19 ]		; <float>:49 [#uses=0]
+	phi float [ 0.000000e+00, %44 ], [ %41, %46 ], [ 0.000000e+00, %19 ]		; <float>:50 [#uses=0]
+	shufflevector <4 x float> %35, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:51 [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
new file mode 100644
index 0000000..4cac9b4
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
+; RUN:   grep push | count 3
+
+define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) {
+entry:
+	icmp sgt i32 %size, 0		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	%tmp5.sum72 = add i32 %col, 7		; <i32> [#uses=1]
+	%tmp5.sum71 = add i32 %col, 5		; <i32> [#uses=1]
+	%tmp5.sum70 = add i32 %col, 3		; <i32> [#uses=1]
+	%tmp5.sum69 = add i32 %col, 2		; <i32> [#uses=1]
+	%tmp5.sum68 = add i32 %col, 1		; <i32> [#uses=1]
+	%tmp5.sum66 = add i32 %col, 4		; <i32> [#uses=1]
+	%tmp5.sum = add i32 %col, 6		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.073.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%p_addr.076.0.rec = mul i32 %i.073.0, 9		; <i32> [#uses=9]
+	%p_addr.076.0 = getelementptr i8* %p, i32 %p_addr.076.0.rec		; <i8*> [#uses=1]
+	%tmp2 = getelementptr i8** %buf, i32 %i.073.0		; <i8**> [#uses=1]
+	%tmp3 = load i8** %tmp2		; <i8*> [#uses=8]
+	%tmp5 = getelementptr i8* %tmp3, i32 %col		; <i8*> [#uses=1]
+	%tmp7 = load i8* %p_addr.076.0		; <i8> [#uses=1]
+	store i8 %tmp7, i8* %tmp5
+	%p_addr.076.0.sum93 = add i32 %p_addr.076.0.rec, 1		; <i32> [#uses=1]
+	%tmp11 = getelementptr i8* %p, i32 %p_addr.076.0.sum93		; <i8*> [#uses=1]
+	%tmp13 = load i8* %tmp11		; <i8> [#uses=1]
+	%tmp15 = getelementptr i8* %tmp3, i32 %tmp5.sum72		; <i8*> [#uses=1]
+	store i8 %tmp13, i8* %tmp15
+	%p_addr.076.0.sum92 = add i32 %p_addr.076.0.rec, 2		; <i32> [#uses=1]
+	%tmp17 = getelementptr i8* %p, i32 %p_addr.076.0.sum92		; <i8*> [#uses=1]
+	%tmp19 = load i8* %tmp17		; <i8> [#uses=1]
+	%tmp21 = getelementptr i8* %tmp3, i32 %tmp5.sum71		; <i8*> [#uses=1]
+	store i8 %tmp19, i8* %tmp21
+	%p_addr.076.0.sum91 = add i32 %p_addr.076.0.rec, 3		; <i32> [#uses=1]
+	%tmp23 = getelementptr i8* %p, i32 %p_addr.076.0.sum91		; <i8*> [#uses=1]
+	%tmp25 = load i8* %tmp23		; <i8> [#uses=1]
+	%tmp27 = getelementptr i8* %tmp3, i32 %tmp5.sum70		; <i8*> [#uses=1]
+	store i8 %tmp25, i8* %tmp27
+	%p_addr.076.0.sum90 = add i32 %p_addr.076.0.rec, 4		; <i32> [#uses=1]
+	%tmp29 = getelementptr i8* %p, i32 %p_addr.076.0.sum90		; <i8*> [#uses=1]
+	%tmp31 = load i8* %tmp29		; <i8> [#uses=1]
+	%tmp33 = getelementptr i8* %tmp3, i32 %tmp5.sum69		; <i8*> [#uses=2]
+	store i8 %tmp31, i8* %tmp33
+	%p_addr.076.0.sum89 = add i32 %p_addr.076.0.rec, 5		; <i32> [#uses=1]
+	%tmp35 = getelementptr i8* %p, i32 %p_addr.076.0.sum89		; <i8*> [#uses=1]
+	%tmp37 = load i8* %tmp35		; <i8> [#uses=1]
+	%tmp39 = getelementptr i8* %tmp3, i32 %tmp5.sum68		; <i8*> [#uses=1]
+	store i8 %tmp37, i8* %tmp39
+	%p_addr.076.0.sum88 = add i32 %p_addr.076.0.rec, 6		; <i32> [#uses=1]
+	%tmp41 = getelementptr i8* %p, i32 %p_addr.076.0.sum88		; <i8*> [#uses=1]
+	%tmp43 = load i8* %tmp41		; <i8> [#uses=1]
+	store i8 %tmp43, i8* %tmp33
+	%p_addr.076.0.sum87 = add i32 %p_addr.076.0.rec, 7		; <i32> [#uses=1]
+	%tmp47 = getelementptr i8* %p, i32 %p_addr.076.0.sum87		; <i8*> [#uses=1]
+	%tmp49 = load i8* %tmp47		; <i8> [#uses=1]
+	%tmp51 = getelementptr i8* %tmp3, i32 %tmp5.sum66		; <i8*> [#uses=1]
+	store i8 %tmp49, i8* %tmp51
+	%p_addr.076.0.sum = add i32 %p_addr.076.0.rec, 8		; <i32> [#uses=1]
+	%tmp53 = getelementptr i8* %p, i32 %p_addr.076.0.sum		; <i8*> [#uses=1]
+	%tmp55 = load i8* %tmp53		; <i8> [#uses=1]
+	%tmp57 = getelementptr i8* %tmp3, i32 %tmp5.sum		; <i8*> [#uses=1]
+	store i8 %tmp55, i8* %tmp57
+	%indvar.next = add i32 %i.073.0, 1		; <i32> [#uses=2]
+	icmp eq i32 %indvar.next, %size		; <i1>:1 [#uses=1]
+	br i1 %1, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
new file mode 100644
index 0000000..9580726
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-16-InlineAsm.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86
+
+; ModuleID = 'a.bc'
+
+define i32 @foo(i32 %A, i32 %B) {
+entry:
+	%A_addr = alloca i32		; <i32*> [#uses=2]
+	%B_addr = alloca i32		; <i32*> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%ret = alloca i32, align 4		; <i32*> [#uses=2]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %A, i32* %A_addr
+	store i32 %B, i32* %B_addr
+	%tmp1 = load i32* %A_addr		; <i32> [#uses=1]
+	%tmp2 = call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 7, i32 %tmp1 )		; <i32> [#uses=1]
+	store i32 %tmp2, i32* %ret
+	%tmp3 = load i32* %ret		; <i32> [#uses=1]
+	store i32 %tmp3, i32* %tmp
+	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval5
+}

diff --git a/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
new file mode 100644
index 0000000..70936fb
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86
+; PR1259
+
+define void @test() {
+        %tmp2 = call i32 asm "...", "=r,~{dirflag},~{fpsr},~{flags},~{dx},~{cx},~{ax}"( )
+        unreachable
+}

diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
new file mode 100644
index 0000000..44d68dd
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test(i16 %tmp40414244) {
+  %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 %tmp40414244 )
+  ret i32 %tmp48
+}
+
+define i32 @test2(i16 %tmp40414244) {
+  %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 14 )
+  ret i32 %tmp48
+}

diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
new file mode 100644
index 0000000..3312e01
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9"
+
+define void @test() {
+	%tmp1 = tail call i32* asm sideeffect "mov %gs:${1:P}, $0", "=r,i,~{dirflag},~{fpsr},~{flags}"( i32 72 )		; <%struct._pthread*> [#uses=1]
+	ret void
+}
+
+

diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
new file mode 100644
index 0000000..c1b1ad1
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mcpu=yonah -march=x86 | \
+; RUN:   grep {cmpltsd %xmm0, %xmm0}
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9"
+
+
+define void @acoshf() {
+	%tmp19 = tail call <2 x double> asm sideeffect "pcmpeqd $0, $0 \0A\09 cmpltsd $0, $0", "=x,0,~{dirflag},~{fpsr},~{flags}"( <2 x double> zeroinitializer )		; <<2 x double>> [#uses=0]
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
new file mode 100644
index 0000000..30453d5
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep {psrlw \$8, %xmm0}
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9"
+
+define void @test() {
+        tail call void asm sideeffect "psrlw $0, %xmm0", "X,~{dirflag},~{fpsr},~{flags}"( i32 8 )
+        ret void
+}
+

diff --git a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
new file mode 100644
index 0000000..9676f14
--- /dev/null
+++ b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86
+
+@data = external global [339 x i64]
+
+define void @foo(...) {
+bb1:
+	%t43 = load i64* getelementptr ([339 x i64]* @data, i32 0, i64 212), align 4
+	br i1 false, label %bb80, label %bb6
+bb6:
+	br i1 false, label %bb38, label %bb265
+bb265:
+	ret void
+bb38:
+	br i1 false, label %bb80, label %bb49
+bb80:
+	br i1 false, label %bb146, label %bb268
+bb49:
+	ret void
+bb113:
+	ret void
+bb268:
+	%t1062 = shl i64 %t43, 3
+	%t1066 = shl i64 0, 3
+	br label %bb85
+bb85:
+	%t1025 = phi i64 [ 0, %bb268 ], [ %t102.0, %bb234 ]
+	%t1028 = phi i64 [ 0, %bb268 ], [ %t1066, %bb234 ]
+	%t1031 = phi i64 [ 0, %bb268 ], [ %t103.0, %bb234 ]
+	%t1034 = phi i64 [ 0, %bb268 ], [ %t1066, %bb234 ]
+	%t102.0 = add i64 %t1028, %t1025
+	%t103.0 = add i64 %t1034, %t1031
+	br label %bb86
+bb86:
+	%t108.0 = phi i64 [ %t102.0, %bb85 ], [ %t1139, %bb248 ]
+	%t110.0 = phi i64 [ %t103.0, %bb85 ], [ %t1142, %bb248 ]
+	br label %bb193
+bb193:
+	%t1081 = add i64 %t110.0, -8
+	%t1087 = add i64 %t108.0, -8
+	br i1 false, label %bb193, label %bb248
+bb248:
+	%t1139 = add i64 %t108.0, %t1062
+	%t1142 = add i64 %t110.0, %t1062
+	br i1 false, label %bb86, label %bb234
+bb234:
+	br i1 false, label %bb85, label %bb113
+bb146:
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
new file mode 100644
index 0000000..9f09e88
--- /dev/null
+++ b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s
+; PR1314
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.CycleCount = type { i64, i64 }
+	%struct.bc_struct = type { i32, i32, i32, i32, %struct.bc_struct*, i8*, i8* }
+@_programStartTime = external global %struct.CycleCount		; <%struct.CycleCount*> [#uses=1]
+
+define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind {
+entry:
+	%tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount* @_programStartTime, i32 0, i32 1) )		; <i64> [#uses=0]
+	%tmp221 = sdiv i32 10, 0		; <i32> [#uses=1]
+	tail call fastcc void @_one_mult( i8* null, i32 0, i32 %tmp221, i8* null )
+	ret i32 0
+}
+
+declare fastcc void @_one_mult(i8*, i32, i32, i8*)

diff --git a/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
new file mode 100644
index 0000000..f48c132
--- /dev/null
+++ b/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(<4 x float> %tmp42i) {
+	%tmp42 = call <4 x float> asm "movss $1, $0", "=x,m,~{dirflag},~{fpsr},~{flags}"( float* null )		; <<4 x float>> [#uses=1]
+	%tmp49 = shufflevector <4 x float> %tmp42, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %cond_true10
+	%tmp52 = bitcast <4 x float> %tmp49 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp53 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp52, <4 x i32> < i32 8, i32 undef, i32 undef, i32 undef > )		; <<4 x i32>> [#uses=1]
+	%tmp105 = bitcast <4 x i32> %tmp53 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp108 = fsub <4 x float> zeroinitializer, %tmp105		; <<4 x float>> [#uses=0]
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}
+
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>)

diff --git a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
new file mode 100644
index 0000000..4604f46
--- /dev/null
+++ b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.partition_def = type { i32, [1 x %struct.partition_elem] }
+	%struct.partition_elem = type { i32, %struct.partition_elem*, i32 }
+
+define void @partition_print(%struct.partition_def* %part) {
+entry:
+	br i1 false, label %bb.preheader, label %bb99
+
+bb.preheader:		; preds = %entry
+	br i1 false, label %cond_true, label %cond_next90
+
+cond_true:		; preds = %bb.preheader
+	br i1 false, label %bb32, label %bb87.critedge
+
+bb32:		; preds = %bb32, %cond_true
+	%i.2115.0 = phi i32 [ 0, %cond_true ], [ %indvar.next127, %bb32 ]		; <i32> [#uses=1]
+	%c.2112.0 = phi i32 [ 0, %cond_true ], [ %tmp49, %bb32 ]		; <i32> [#uses=1]
+	%tmp43 = getelementptr %struct.partition_def* %part, i32 0, i32 1, i32 %c.2112.0, i32 1		; <%struct.partition_elem**> [#uses=1]
+	%tmp44 = load %struct.partition_elem** %tmp43		; <%struct.partition_elem*> [#uses=1]
+	%tmp4445 = ptrtoint %struct.partition_elem* %tmp44 to i32		; <i32> [#uses=1]
+	%tmp48 = sub i32 %tmp4445, 0		; <i32> [#uses=1]
+	%tmp49 = sdiv i32 %tmp48, 12		; <i32> [#uses=1]
+	%indvar.next127 = add i32 %i.2115.0, 1		; <i32> [#uses=2]
+	%exitcond128 = icmp eq i32 %indvar.next127, 0		; <i1> [#uses=1]
+	br i1 %exitcond128, label %bb58, label %bb32
+
+bb58:		; preds = %bb32
+	ret void
+
+bb87.critedge:		; preds = %cond_true
+	ret void
+
+cond_next90:		; preds = %bb.preheader
+	ret void
+
+bb99:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
new file mode 100644
index 0000000..7528129
--- /dev/null
+++ b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | not grep 4294967112
+; PR1348
+
+	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }
+
+define i8* @md5_buffer(i8* %buffer, i64 %len, i8* %resblock) {
+entry:
+	%ctx = alloca %struct.md5_ctx, align 16		; <%struct.md5_ctx*> [#uses=3]
+	call void @md5_init_ctx( %struct.md5_ctx* %ctx )
+	call void @md5_process_bytes( i8* %buffer, i64 %len, %struct.md5_ctx* %ctx )
+	%tmp4 = call i8* @md5_finish_ctx( %struct.md5_ctx* %ctx, i8* %resblock )		; <i8*> [#uses=1]
+	ret i8* %tmp4
+}
+
+declare void @md5_init_ctx(%struct.md5_ctx*)
+
+declare i8* @md5_finish_ctx(%struct.md5_ctx*, i8*)
+
+declare void @md5_process_bytes(i8*, i64, %struct.md5_ctx*)

diff --git a/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/test/CodeGen/X86/2007-04-24-VectorCrash.ll
new file mode 100644
index 0000000..e38992d
--- /dev/null
+++ b/test/CodeGen/X86/2007-04-24-VectorCrash.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -mcpu=yonah
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
+
+define void @test(float* %P) {
+entry:
+	or <4 x i32> zeroinitializer, and (<4 x i32> bitcast (<4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer) to <4 x i32>), <4 x i32> < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >)		; <<4 x i32>>:0 [#uses=1]
+	bitcast <4 x i32> %0 to <4 x float>		; <<4 x float>>:1 [#uses=1]
+	fsub <4 x float> %1, zeroinitializer		; <<4 x float>>:2 [#uses=1]
+	fsub <4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer), %2		; <<4 x float>>:3 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %3, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:4 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %4, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:5 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %5, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:6 [#uses=1]
+	shufflevector <4 x float> %6, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:7 [#uses=1]
+	shufflevector <4 x float> %7, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:8 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %8, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:9 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %9, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:10 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %10, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:11 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %11, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:12 [#uses=1]
+	shufflevector <4 x float> %12, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:13 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %13, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:14 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %14, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:15 [#uses=1]
+	shufflevector <4 x float> %15, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:16 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %16, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:17 [#uses=1]
+	shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:18 [#uses=1]
+	shufflevector <4 x float> %18, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:19 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %19, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:20 [#uses=1]
+	shufflevector <4 x float> %20, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:21 [#uses=1]
+	shufflevector <4 x float> %21, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:22 [#uses=1]
+	fmul <4 x float> %22, zeroinitializer		; <<4 x float>>:23 [#uses=1]
+	shufflevector <4 x float> %23, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:24 [#uses=1]
+	call <4 x float> @llvm.x86.sse.add.ss( <4 x float> zeroinitializer, <4 x float> %24 )		; <<4 x float>>:25 [#uses=1]
+	shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:26 [#uses=1]
+	shufflevector <4 x float> %26, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:27 [#uses=1]
+	shufflevector <4 x float> %27, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:28 [#uses=1]
+	fmul <4 x float> zeroinitializer, %28		; <<4 x float>>:29 [#uses=1]
+	fadd <4 x float> %29, zeroinitializer		; <<4 x float>>:30 [#uses=1]
+	fmul <4 x float> zeroinitializer, %30		; <<4 x float>>:31 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %31, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:32 [#uses=1]
+	fmul <4 x float> zeroinitializer, %32		; <<4 x float>>:33 [#uses=1]
+	shufflevector <4 x float> %33, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> zeroinitializer, %34		; <<4 x float>>:35 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %35, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >		; <<4 x float>>:36 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %36, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:37 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %37, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:38 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %38, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:39 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %39, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:40 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %40, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:41 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %41, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:42 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %42, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:43 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %43, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:44 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %44, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:45 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %45, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:46 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %46, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:47 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %47, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:48 [#uses=1]
+	shufflevector <4 x float> %48, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:49 [#uses=1]
+	fadd <4 x float> %49, zeroinitializer		; <<4 x float>>:50 [#uses=1]
+	%tmp5845 = extractelement <4 x float> %50, i32 2		; <float> [#uses=1]
+	store float %tmp5845, float* %P
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
new file mode 100644
index 0000000..113d0eb
--- /dev/null
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep paddq | count 2
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | grep movq | count 2
+
+define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) {
+entry:
+	%tmp2942 = icmp eq i32 %count, 0		; <i1> [#uses=1]
+	br i1 %tmp2942, label %bb31, label %bb26
+
+bb26:		; preds = %bb26, %entry
+	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
+	%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
+	%tmp13 = getelementptr <1 x i64>* %b, i32 %i.037.0		; <<1 x i64>*> [#uses=1]
+	%tmp14 = load <1 x i64>* %tmp13		; <<1 x i64>> [#uses=1]
+	%tmp18 = getelementptr <1 x i64>* %a, i32 %i.037.0		; <<1 x i64>*> [#uses=1]
+	%tmp19 = load <1 x i64>* %tmp18		; <<1 x i64>> [#uses=1]
+	%tmp21 = add <1 x i64> %tmp19, %tmp14		; <<1 x i64>> [#uses=1]
+	%tmp22 = add <1 x i64> %tmp21, %sum.035.0		; <<1 x i64>> [#uses=2]
+	%tmp25 = add i32 %i.037.0, 1		; <i32> [#uses=2]
+	%tmp29 = icmp ult i32 %tmp25, %count		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb26, label %bb31
+
+bb31:		; preds = %bb26, %entry
+	%sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %sum.035.1
+}

diff --git a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
new file mode 100644
index 0000000..85a2ecc
--- /dev/null
+++ b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep {bsrl.*10}
+; PR1356
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @main() {
+entry:
+        %tmp4 = tail call i32 asm "bsrl  $1, $0", "=r,ro,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 10 )           ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+

diff --git a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
new file mode 100644
index 0000000..e58b193
--- /dev/null
+++ b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse
+; PR1371
+
+@str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
+
+define void @test() {
+bb.i:
+	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]
+	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp177.i = and <4 x i32> %tmp176.i, %tmp175.i		; <<4 x i32>> [#uses=1]
+	%tmp190.i = or <4 x i32> %tmp177.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp191.i = bitcast <4 x i32> %tmp190.i to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp191.i, <4 x float>* null
+	ret void
+}
+
+declare void @printf(i32, ...)

diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
new file mode 100644
index 0000000..a3ff2f6
--- /dev/null
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -enable-eh -disable-fp-elim | not grep {addl .12, %esp}
+; PR1398
+
+	%struct.S = type { i32, i32 }
+
+declare void @invokee(%struct.S* sret )
+
+define void @invoker(%struct.S* %name.0.0) {
+entry:
+	invoke void @invokee( %struct.S* %name.0.0 sret  )
+			to label %return unwind label %return
+
+return:		; preds = %entry, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
new file mode 100644
index 0000000..8ef2538
--- /dev/null
+++ b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.XDesc = type <{ i32, %struct.OpaqueXDataStorageType** }>
+	%struct.OpaqueXDataStorageType = type opaque
+
+declare i16 @GetParamDesc(%struct.XDesc*, i32, i32, %struct.XDesc*) signext 
+
+declare void @r_raise(i64, i8*, ...)
+
+define i64 @app_send_event(i64 %self, i64 %event_class, i64 %event_id, i64 %params, i64 %need_retval) {
+entry:
+	br i1 false, label %cond_true109, label %bb83.preheader
+
+bb83.preheader:		; preds = %entry
+	ret i64 0
+
+cond_true109:		; preds = %entry
+	br i1 false, label %cond_next164, label %cond_true239
+
+cond_next164:		; preds = %cond_true109
+	%tmp176 = call i16 @GetParamDesc( %struct.XDesc* null, i32 1701999219, i32 1413830740, %struct.XDesc* null ) signext 		; <i16> [#uses=0]
+	call void (i64, i8*, ...)* @r_raise( i64 0, i8* null )
+	unreachable
+
+cond_true239:		; preds = %cond_true109
+	ret i64 0
+}

diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
new file mode 100644
index 0000000..2093b8f
--- /dev/null
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mcpu=yonah
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(<1 x i64> %c64, <1 x i64> %mask1, i8* %P) {
+entry:
+	%tmp4 = bitcast <1 x i64> %mask1 to <8 x i8>		; <<8 x i8>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %c64 to <8 x i8>		; <<8 x i8>> [#uses=1]
+	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> %tmp6, <8 x i8> %tmp4, i8* %P )
+	ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)

diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
new file mode 100644
index 0000000..b27ef83
--- /dev/null
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
+
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
+
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>)
+
+define fastcc void @test(i32* %src, i32 %sbpr, i32* %dst, i32 %dbpr, i32 %w, i32 %h, i32 %dstalpha, i32 %mask) {
+	%tmp633 = shufflevector <8 x i16> zeroinitializer, <8 x i16> undef, <8 x i32> < i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7 >
+	%tmp715 = mul <8 x i16> zeroinitializer, %tmp633
+	%tmp776 = bitcast <8 x i16> %tmp715 to <4 x i32>
+	%tmp777 = add <4 x i32> %tmp776, shufflevector (<4 x i32> < i32 65537, i32 0, i32 0, i32 0 >, <4 x i32> < i32 65537, i32 0, i32 0, i32 0 >, <4 x i32> zeroinitializer)
+	%tmp805 = add <4 x i32> %tmp777, zeroinitializer
+	%tmp832 = bitcast <4 x i32> %tmp805 to <8 x i16>
+	%tmp838 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp832, <8 x i16> < i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef > )
+	%tmp1020 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
+	%tmp1030 = bitcast <16 x i8> %tmp1020 to <4 x i32>
+	%tmp1033 = add <4 x i32> zeroinitializer, %tmp1030
+	%tmp1048 = bitcast <4 x i32> %tmp1033 to <2 x i64>
+	%tmp1049 = or <2 x i64> %tmp1048, zeroinitializer
+	store <2 x i64> %tmp1049, <2 x i64>* null
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
new file mode 100644
index 0000000..321e116
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep GOTPCREL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep ".align.*3"
+
+	%struct.A = type { [1024 x i8] }
+@_ZN1A1aE = global %struct.A zeroinitializer, align 32		; <%struct.A*> [#uses=1]
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN1A1aE } ]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define internal void @_GLOBAL__I__ZN1A1aE() section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %entry
+	%i.1.i1.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i ]		; <i32> [#uses=2]
+	%tmp1012.i = sext i32 %i.1.i1.0 to i64		; <i64> [#uses=1]
+	%tmp13.i = getelementptr %struct.A* @_ZN1A1aE, i32 0, i32 0, i64 %tmp1012.i		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp13.i
+	%indvar.next = add i32 %i.1.i1.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 1024		; <i1> [#uses=1]
+	br i1 %exitcond, label %_Z41__static_initialization_and_destruction_0ii.exit, label %bb.i
+
+_Z41__static_initialization_and_destruction_0ii.exit:		; preds = %bb.i
+	ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2007-06-04-tailmerge4.ll b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
new file mode 100644
index 0000000..baf2377
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-04-tailmerge4.ll

@@ -0,0 +1,454 @@
+; RUN: llc < %s -enable-eh -asm-verbose | grep invcont131
+; PR 1496:  tail merge was incorrectly removing this block
+
+; ModuleID = 'report.1.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+  %struct.ALLOC = type { %struct.string___XUB, [2 x i8] }
+  %struct.RETURN = type { i32, i32, i32, i64 }
+  %struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+  %struct.ada__tags__dispatch_table = type { [1 x i8*] }
+  %struct.ada__text_io__text_afcb = type { %struct.system__file_control_block__afcb, i32, i32, i32, i32, i32, %struct.ada__text_io__text_afcb*, i8, i8 }
+  %struct.string___XUB = type { i32, i32 }
+  %struct.string___XUP = type { i8*, %struct.string___XUB* }
+  %struct.system__file_control_block__afcb = type { %struct.ada__streams__root_stream_type, i32, %struct.string___XUP, i32, %struct.string___XUP, i8, i8, i8, i8, i8, i8, i8, %struct.system__file_control_block__afcb*, %struct.system__file_control_block__afcb* }
+  %struct.system__secondary_stack__mark_id = type { i8*, i32 }
+  %struct.wide_string___XUP = type { i16*, %struct.string___XUB* }
+@report_E = global i8 0   ; <i8*> [#uses=0]
+@report__test_status = internal global i8 1   ; <i8*> [#uses=8]
+@report__test_name = internal global [15 x i8] zeroinitializer    ; <[15 x i8]*> [#uses=10]
+@report__test_name_len = internal global i32 0    ; <i32*> [#uses=15]
+@.str = internal constant [12 x i8] c"report.adb\00\00"   ; <[12 x i8]*> [#uses=1]
+@C.26.599 = internal constant %struct.string___XUB { i32 1, i32 1 }   ; <%struct.string___XUB*> [#uses=1]
+@.str1 = internal constant [1 x i8] c":"    ; <[1 x i8]*> [#uses=1]
+@.str2 = internal constant [1 x i8] c" "    ; <[1 x i8]*> [#uses=1]
+@.str3 = internal constant [1 x i8] c"-"    ; <[1 x i8]*> [#uses=1]
+@.str5 = internal constant [10 x i8] c"0123456789"    ; <[10 x i8]*> [#uses=12]
+@C.59.855 = internal constant %struct.string___XUB { i32 1, i32 0 }   ; <%struct.string___XUB*> [#uses=1]
+@C.69.876 = internal constant %struct.string___XUB { i32 1, i32 3 }   ; <%struct.string___XUB*> [#uses=1]
+@C.70.879 = internal constant %struct.string___XUB { i32 1, i32 6 }   ; <%struct.string___XUB*> [#uses=1]
+@C.81.900 = internal constant %struct.string___XUB { i32 1, i32 5 }   ; <%struct.string___XUB*> [#uses=1]
+@.str6 = internal constant [0 x i8] zeroinitializer   ; <[0 x i8]*> [#uses=1]
+@.str7 = internal constant [3 x i8] c"2.5"    ; <[3 x i8]*> [#uses=1]
+@.str8 = internal constant [6 x i8] c"ACATS "   ; <[6 x i8]*> [#uses=1]
+@.str9 = internal constant [5 x i8] c",.,. "    ; <[5 x i8]*> [#uses=1]
+@.str10 = internal constant [1 x i8] c"."   ; <[1 x i8]*> [#uses=1]
+@.str11 = internal constant [5 x i8] c"---- "   ; <[5 x i8]*> [#uses=1]
+@.str12 = internal constant [5 x i8] c"   - "   ; <[5 x i8]*> [#uses=1]
+@.str13 = internal constant [5 x i8] c"   * "   ; <[5 x i8]*> [#uses=1]
+@.str14 = internal constant [5 x i8] c"   + "   ; <[5 x i8]*> [#uses=1]
+@.str15 = internal constant [5 x i8] c"   ! "   ; <[5 x i8]*> [#uses=1]
+@C.209.1380 = internal constant %struct.string___XUB { i32 1, i32 37 }    ; <%struct.string___XUB*> [#uses=1]
+@.str16 = internal constant [37 x i8] c" PASSED ============================."    ; <[37 x i8]*> [#uses=1]
+@.str17 = internal constant [5 x i8] c"==== "   ; <[5 x i8]*> [#uses=1]
+@.str18 = internal constant [37 x i8] c" NOT-APPLICABLE ++++++++++++++++++++."    ; <[37 x i8]*> [#uses=1]
+@.str19 = internal constant [5 x i8] c"++++ "   ; <[5 x i8]*> [#uses=1]
+@.str20 = internal constant [37 x i8] c" TENTATIVELY PASSED !!!!!!!!!!!!!!!!."    ; <[37 x i8]*> [#uses=1]
+@.str21 = internal constant [5 x i8] c"!!!! "   ; <[5 x i8]*> [#uses=1]
+@.str22 = internal constant [37 x i8] c" SEE '!' COMMENTS FOR SPECIAL NOTES!!"    ; <[37 x i8]*> [#uses=1]
+@.str23 = internal constant [37 x i8] c" FAILED ****************************."    ; <[37 x i8]*> [#uses=1]
+@.str24 = internal constant [5 x i8] c"**** "   ; <[5 x i8]*> [#uses=1]
+@__gnat_others_value = external constant i32    ; <i32*> [#uses=2]
+@system__soft_links__abort_undefer = external global void ()*   ; <void ()**> [#uses=1]
+@C.320.1854 = internal constant %struct.string___XUB { i32 2, i32 6 }   ; <%struct.string___XUB*> [#uses=1]
+
+declare void @report__put_msg(i64 %msg.0.0)
+
+declare void @__gnat_rcheck_05(i8*, i32)
+
+declare void @__gnat_rcheck_12(i8*, i32)
+
+declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output()
+
+declare void @ada__text_io__set_col(%struct.ada__text_io__text_afcb*, i32)
+
+declare void @ada__text_io__put_line(%struct.ada__text_io__text_afcb*, i64)
+
+declare void @report__time_stamp(%struct.string___XUP* sret  %agg.result)
+
+declare i64 @ada__calendar__clock()
+
+declare void @ada__calendar__split(%struct.RETURN* sret , i64)
+
+declare void @system__string_ops_concat_5__str_concat_5(%struct.string___XUP* sret , i64, i64, i64, i64, i64)
+
+declare void @system__string_ops_concat_3__str_concat_3(%struct.string___XUP* sret , i64, i64, i64)
+
+declare i8* @system__secondary_stack__ss_allocate(i32)
+
+declare void @report__test(i64 %name.0.0, i64 %descr.0.0)
+
+declare void @system__secondary_stack__ss_mark(%struct.system__secondary_stack__mark_id* sret )
+
+declare i8* @llvm.eh.exception()
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i32 @__gnat_eh_personality(...)
+
+declare i32 @_Unwind_Resume(...)
+
+declare void @__gnat_rcheck_07(i8*, i32)
+
+declare void @system__secondary_stack__ss_release(i64)
+
+declare void @report__comment(i64 %descr.0.0)
+
+declare void @report__failed(i64 %descr.0.0)
+
+declare void @report__not_applicable(i64 %descr.0.0)
+
+declare void @report__special_action(i64 %descr.0.0)
+
+define void @report__result() {
+entry:
+  %tmp = alloca %struct.system__secondary_stack__mark_id, align 8   ; <%struct.system__secondary_stack__mark_id*> [#uses=3]
+  %A.210 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp5 = alloca %struct.string___XUP, align 8    ; <%struct.string___XUP*> [#uses=3]
+  %A.229 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp10 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  %A.248 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp15 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  %A.270 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp20 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  %A.284 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp25 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  call void @system__secondary_stack__ss_mark( %struct.system__secondary_stack__mark_id* %tmp sret  )
+  %tmp28 = getelementptr %struct.system__secondary_stack__mark_id* %tmp, i32 0, i32 0   ; <i8**> [#uses=1]
+  %tmp29 = load i8** %tmp28   ; <i8*> [#uses=2]
+  %tmp31 = getelementptr %struct.system__secondary_stack__mark_id* %tmp, i32 0, i32 1   ; <i32*> [#uses=1]
+  %tmp32 = load i32* %tmp31   ; <i32> [#uses=2]
+  %tmp33 = load i8* @report__test_status    ; <i8> [#uses=1]
+  switch i8 %tmp33, label %bb483 [
+     i8 0, label %bb
+     i8 2, label %bb143
+     i8 3, label %bb261
+  ]
+
+bb:   ; preds = %entry
+  %tmp34 = load i32* @report__test_name_len   ; <i32> [#uses=4]
+  %tmp35 = icmp sgt i32 %tmp34, 0   ; <i1> [#uses=2]
+  %tmp40 = icmp sgt i32 %tmp34, 15    ; <i1> [#uses=1]
+  %bothcond139 = and i1 %tmp35, %tmp40    ; <i1> [#uses=1]
+  br i1 %bothcond139, label %cond_true43, label %cond_next44
+
+cond_true43:    ; preds = %bb
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 212 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+unwind:   ; preds = %invcont589, %cond_next567, %bb555, %cond_true497, %invcont249, %cond_next227, %bb215, %cond_true157, %invcont131, %cond_next109, %bb97, %cond_true43
+  %eh_ptr = call i8* @llvm.eh.exception( )    ; <i8*> [#uses=1]
+  br label %cleanup717
+
+cond_next44:    ; preds = %bb
+  %tmp72 = getelementptr %struct.string___XUB* %A.210, i32 0, i32 0   ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp72
+  %tmp73 = getelementptr %struct.string___XUB* %A.210, i32 0, i32 1   ; <i32*> [#uses=1]
+  store i32 %tmp34, i32* %tmp73
+  br i1 %tmp35, label %cond_true80, label %cond_next109
+
+cond_true80:    ; preds = %cond_next44
+  %tmp45.off = add i32 %tmp34, -1   ; <i32> [#uses=1]
+  %bothcond = icmp ugt i32 %tmp45.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond, label %bb97, label %cond_next109
+
+bb97:   ; preds = %cond_true80
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 212 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next109:   ; preds = %cond_true80, %cond_next44
+  %A.210128 = ptrtoint %struct.string___XUB* %A.210 to i32    ; <i32> [#uses=1]
+  %A.210128129 = zext i32 %A.210128 to i64    ; <i64> [#uses=1]
+  %A.210128129130 = shl i64 %A.210128129, 32    ; <i64> [#uses=1]
+  %A.210128129130.ins = or i64 %A.210128129130, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp5 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str17 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.210128129130.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str16 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont131 unwind label %unwind
+
+invcont131:   ; preds = %cond_next109
+  %tmp133 = getelementptr %struct.string___XUP* %tmp5, i32 0, i32 0   ; <i8**> [#uses=1]
+  %tmp134 = load i8** %tmp133   ; <i8*> [#uses=1]
+  %tmp134120 = ptrtoint i8* %tmp134 to i32    ; <i32> [#uses=1]
+  %tmp134120121 = zext i32 %tmp134120 to i64    ; <i64> [#uses=1]
+  %tmp136 = getelementptr %struct.string___XUP* %tmp5, i32 0, i32 1   ; <%struct.string___XUB**> [#uses=1]
+  %tmp137 = load %struct.string___XUB** %tmp136   ; <%struct.string___XUB*> [#uses=1]
+  %tmp137116 = ptrtoint %struct.string___XUB* %tmp137 to i32    ; <i32> [#uses=1]
+  %tmp137116117 = zext i32 %tmp137116 to i64    ; <i64> [#uses=1]
+  %tmp137116117118 = shl i64 %tmp137116117, 32    ; <i64> [#uses=1]
+  %tmp137116117118.ins = or i64 %tmp137116117118, %tmp134120121   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp137116117118.ins )
+      to label %cond_next618 unwind label %unwind
+
+bb143:    ; preds = %entry
+  %tmp144 = load i32* @report__test_name_len    ; <i32> [#uses=4]
+  %tmp147 = icmp sgt i32 %tmp144, 0   ; <i1> [#uses=2]
+  %tmp154 = icmp sgt i32 %tmp144, 15    ; <i1> [#uses=1]
+  %bothcond140 = and i1 %tmp147, %tmp154    ; <i1> [#uses=1]
+  br i1 %bothcond140, label %cond_true157, label %cond_next160
+
+cond_true157:   ; preds = %bb143
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 215 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next160:   ; preds = %bb143
+  %tmp189 = getelementptr %struct.string___XUB* %A.229, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp189
+  %tmp190 = getelementptr %struct.string___XUB* %A.229, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp144, i32* %tmp190
+  br i1 %tmp147, label %cond_true197, label %cond_next227
+
+cond_true197:   ; preds = %cond_next160
+  %tmp161.off = add i32 %tmp144, -1   ; <i32> [#uses=1]
+  %bothcond1 = icmp ugt i32 %tmp161.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond1, label %bb215, label %cond_next227
+
+bb215:    ; preds = %cond_true197
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 215 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next227:   ; preds = %cond_true197, %cond_next160
+  %A.229105 = ptrtoint %struct.string___XUB* %A.229 to i32    ; <i32> [#uses=1]
+  %A.229105106 = zext i32 %A.229105 to i64    ; <i64> [#uses=1]
+  %A.229105106107 = shl i64 %A.229105106, 32    ; <i64> [#uses=1]
+  %A.229105106107.ins = or i64 %A.229105106107, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp10 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str19 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.229105106107.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str18 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont249 unwind label %unwind
+
+invcont249:   ; preds = %cond_next227
+  %tmp251 = getelementptr %struct.string___XUP* %tmp10, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp252 = load i8** %tmp251   ; <i8*> [#uses=1]
+  %tmp25297 = ptrtoint i8* %tmp252 to i32   ; <i32> [#uses=1]
+  %tmp2529798 = zext i32 %tmp25297 to i64   ; <i64> [#uses=1]
+  %tmp254 = getelementptr %struct.string___XUP* %tmp10, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp255 = load %struct.string___XUB** %tmp254   ; <%struct.string___XUB*> [#uses=1]
+  %tmp25593 = ptrtoint %struct.string___XUB* %tmp255 to i32   ; <i32> [#uses=1]
+  %tmp2559394 = zext i32 %tmp25593 to i64   ; <i64> [#uses=1]
+  %tmp255939495 = shl i64 %tmp2559394, 32   ; <i64> [#uses=1]
+  %tmp255939495.ins = or i64 %tmp255939495, %tmp2529798   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp255939495.ins )
+      to label %cond_next618 unwind label %unwind
+
+bb261:    ; preds = %entry
+  %tmp262 = call i8* @llvm.stacksave( )   ; <i8*> [#uses=2]
+  %tmp263 = load i32* @report__test_name_len    ; <i32> [#uses=4]
+  %tmp266 = icmp sgt i32 %tmp263, 0   ; <i1> [#uses=2]
+  %tmp273 = icmp sgt i32 %tmp263, 15    ; <i1> [#uses=1]
+  %bothcond141 = and i1 %tmp266, %tmp273    ; <i1> [#uses=1]
+  br i1 %bothcond141, label %cond_true276, label %cond_next281
+
+cond_true276:   ; preds = %bb261
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 218 )
+      to label %UnifiedUnreachableBlock unwind label %unwind277
+
+unwind277:    ; preds = %invcont467, %cond_next442, %invcont370, %cond_next348, %bb336, %cond_true276
+  %eh_ptr278 = call i8* @llvm.eh.exception( )   ; <i8*> [#uses=1]
+  call void @llvm.stackrestore( i8* %tmp262 )
+  br label %cleanup717
+
+cond_next281:   ; preds = %bb261
+  %tmp310 = getelementptr %struct.string___XUB* %A.248, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp310
+  %tmp311 = getelementptr %struct.string___XUB* %A.248, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp263, i32* %tmp311
+  br i1 %tmp266, label %cond_true318, label %cond_next348
+
+cond_true318:   ; preds = %cond_next281
+  %tmp282.off = add i32 %tmp263, -1   ; <i32> [#uses=1]
+  %bothcond2 = icmp ugt i32 %tmp282.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond2, label %bb336, label %cond_next348
+
+bb336:    ; preds = %cond_true318
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 218 )
+      to label %UnifiedUnreachableBlock unwind label %unwind277
+
+cond_next348:   ; preds = %cond_true318, %cond_next281
+  %A.24882 = ptrtoint %struct.string___XUB* %A.248 to i32   ; <i32> [#uses=1]
+  %A.2488283 = zext i32 %A.24882 to i64   ; <i64> [#uses=1]
+  %A.248828384 = shl i64 %A.2488283, 32   ; <i64> [#uses=1]
+  %A.248828384.ins = or i64 %A.248828384, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp15 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str21 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.248828384.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str20 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont370 unwind label %unwind277
+
+invcont370:   ; preds = %cond_next348
+  %tmp372 = getelementptr %struct.string___XUP* %tmp15, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp373 = load i8** %tmp372   ; <i8*> [#uses=1]
+  %tmp37374 = ptrtoint i8* %tmp373 to i32   ; <i32> [#uses=1]
+  %tmp3737475 = zext i32 %tmp37374 to i64   ; <i64> [#uses=1]
+  %tmp375 = getelementptr %struct.string___XUP* %tmp15, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp376 = load %struct.string___XUB** %tmp375   ; <%struct.string___XUB*> [#uses=1]
+  %tmp37670 = ptrtoint %struct.string___XUB* %tmp376 to i32   ; <i32> [#uses=1]
+  %tmp3767071 = zext i32 %tmp37670 to i64   ; <i64> [#uses=1]
+  %tmp376707172 = shl i64 %tmp3767071, 32   ; <i64> [#uses=1]
+  %tmp376707172.ins = or i64 %tmp376707172, %tmp3737475   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp376707172.ins )
+      to label %invcont381 unwind label %unwind277
+
+invcont381:   ; preds = %invcont370
+  %tmp382 = load i32* @report__test_name_len    ; <i32> [#uses=6]
+  %tmp415 = icmp sgt i32 %tmp382, -1    ; <i1> [#uses=1]
+  %max416 = select i1 %tmp415, i32 %tmp382, i32 0   ; <i32> [#uses=1]
+  %tmp417 = alloca i8, i32 %max416    ; <i8*> [#uses=3]
+  %tmp423 = icmp sgt i32 %tmp382, 0   ; <i1> [#uses=1]
+  br i1 %tmp423, label %bb427, label %cond_next442
+
+bb427:    ; preds = %invcont381
+  store i8 32, i8* %tmp417
+  %tmp434 = icmp eq i32 %tmp382, 1    ; <i1> [#uses=1]
+  br i1 %tmp434, label %cond_next442, label %cond_next438.preheader
+
+cond_next438.preheader:   ; preds = %bb427
+  %tmp. = add i32 %tmp382, -1   ; <i32> [#uses=1]
+  br label %cond_next438
+
+cond_next438:   ; preds = %cond_next438, %cond_next438.preheader
+  %indvar = phi i32 [ 0, %cond_next438.preheader ], [ %J130b.513.5, %cond_next438 ]   ; <i32> [#uses=1]
+  %J130b.513.5 = add i32 %indvar, 1   ; <i32> [#uses=3]
+  %tmp43118 = getelementptr i8* %tmp417, i32 %J130b.513.5   ; <i8*> [#uses=1]
+  store i8 32, i8* %tmp43118
+  %exitcond = icmp eq i32 %J130b.513.5, %tmp.   ; <i1> [#uses=1]
+  br i1 %exitcond, label %cond_next442, label %cond_next438
+
+cond_next442:   ; preds = %cond_next438, %bb427, %invcont381
+  %tmp448 = getelementptr %struct.string___XUB* %A.270, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp448
+  %tmp449 = getelementptr %struct.string___XUB* %A.270, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp382, i32* %tmp449
+  %tmp41762 = ptrtoint i8* %tmp417 to i32   ; <i32> [#uses=1]
+  %tmp4176263 = zext i32 %tmp41762 to i64   ; <i64> [#uses=1]
+  %A.27058 = ptrtoint %struct.string___XUB* %A.270 to i32   ; <i32> [#uses=1]
+  %A.2705859 = zext i32 %A.27058 to i64   ; <i64> [#uses=1]
+  %A.270585960 = shl i64 %A.2705859, 32   ; <i64> [#uses=1]
+  %A.270585960.ins = or i64 %tmp4176263, %A.270585960   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp20 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str21 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.270585960.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str22 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont467 unwind label %unwind277
+
+invcont467:   ; preds = %cond_next442
+  %tmp469 = getelementptr %struct.string___XUP* %tmp20, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp470 = load i8** %tmp469   ; <i8*> [#uses=1]
+  %tmp47050 = ptrtoint i8* %tmp470 to i32   ; <i32> [#uses=1]
+  %tmp4705051 = zext i32 %tmp47050 to i64   ; <i64> [#uses=1]
+  %tmp472 = getelementptr %struct.string___XUP* %tmp20, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp473 = load %struct.string___XUB** %tmp472   ; <%struct.string___XUB*> [#uses=1]
+  %tmp47346 = ptrtoint %struct.string___XUB* %tmp473 to i32   ; <i32> [#uses=1]
+  %tmp4734647 = zext i32 %tmp47346 to i64   ; <i64> [#uses=1]
+  %tmp473464748 = shl i64 %tmp4734647, 32   ; <i64> [#uses=1]
+  %tmp473464748.ins = or i64 %tmp473464748, %tmp4705051   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp473464748.ins )
+      to label %cleanup unwind label %unwind277
+
+cleanup:    ; preds = %invcont467
+  call void @llvm.stackrestore( i8* %tmp262 )
+  br label %cond_next618
+
+bb483:    ; preds = %entry
+  %tmp484 = load i32* @report__test_name_len    ; <i32> [#uses=4]
+  %tmp487 = icmp sgt i32 %tmp484, 0   ; <i1> [#uses=2]
+  %tmp494 = icmp sgt i32 %tmp484, 15    ; <i1> [#uses=1]
+  %bothcond142 = and i1 %tmp487, %tmp494    ; <i1> [#uses=1]
+  br i1 %bothcond142, label %cond_true497, label %cond_next500
+
+cond_true497:   ; preds = %bb483
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 223 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next500:   ; preds = %bb483
+  %tmp529 = getelementptr %struct.string___XUB* %A.284, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp529
+  %tmp530 = getelementptr %struct.string___XUB* %A.284, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp484, i32* %tmp530
+  br i1 %tmp487, label %cond_true537, label %cond_next567
+
+cond_true537:   ; preds = %cond_next500
+  %tmp501.off = add i32 %tmp484, -1   ; <i32> [#uses=1]
+  %bothcond3 = icmp ugt i32 %tmp501.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond3, label %bb555, label %cond_next567
+
+bb555:    ; preds = %cond_true537
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 223 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next567:   ; preds = %cond_true537, %cond_next500
+  %A.28435 = ptrtoint %struct.string___XUB* %A.284 to i32   ; <i32> [#uses=1]
+  %A.2843536 = zext i32 %A.28435 to i64   ; <i64> [#uses=1]
+  %A.284353637 = shl i64 %A.2843536, 32   ; <i64> [#uses=1]
+  %A.284353637.ins = or i64 %A.284353637, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp25 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str24 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.284353637.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str23 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont589 unwind label %unwind
+
+invcont589:   ; preds = %cond_next567
+  %tmp591 = getelementptr %struct.string___XUP* %tmp25, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp592 = load i8** %tmp591   ; <i8*> [#uses=1]
+  %tmp59228 = ptrtoint i8* %tmp592 to i32   ; <i32> [#uses=1]
+  %tmp5922829 = zext i32 %tmp59228 to i64   ; <i64> [#uses=1]
+  %tmp594 = getelementptr %struct.string___XUP* %tmp25, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp595 = load %struct.string___XUB** %tmp594   ; <%struct.string___XUB*> [#uses=1]
+  %tmp59524 = ptrtoint %struct.string___XUB* %tmp595 to i32   ; <i32> [#uses=1]
+  %tmp5952425 = zext i32 %tmp59524 to i64   ; <i64> [#uses=1]
+  %tmp595242526 = shl i64 %tmp5952425, 32   ; <i64> [#uses=1]
+  %tmp595242526.ins = or i64 %tmp595242526, %tmp5922829   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp595242526.ins )
+      to label %cond_next618 unwind label %unwind
+
+cond_next618:   ; preds = %invcont589, %cleanup, %invcont249, %invcont131
+  store i8 1, i8* @report__test_status
+  store i32 7, i32* @report__test_name_len
+  store i8 78, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 0)
+  store i8 79, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 1)
+  store i8 95, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 2)
+  store i8 78, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 3)
+  store i8 65, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 4)
+  store i8 77, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 5)
+  store i8 69, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 6)
+  %CHAIN.310.0.0.0.val5.i = ptrtoint i8* %tmp29 to i32    ; <i32> [#uses=1]
+  %CHAIN.310.0.0.0.val56.i = zext i32 %CHAIN.310.0.0.0.val5.i to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val2.i = zext i32 %tmp32 to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.i = shl i64 %CHAIN.310.0.0.1.val2.i, 32    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.ins.i = or i64 %CHAIN.310.0.0.1.val23.i, %CHAIN.310.0.0.0.val56.i    ; <i64> [#uses=1]
+  call void @system__secondary_stack__ss_release( i64 %CHAIN.310.0.0.1.val23.ins.i )
+  ret void
+
+cleanup717:   ; preds = %unwind277, %unwind
+  %eh_exception.0 = phi i8* [ %eh_ptr278, %unwind277 ], [ %eh_ptr, %unwind ]    ; <i8*> [#uses=1]
+  %CHAIN.310.0.0.0.val5.i8 = ptrtoint i8* %tmp29 to i32   ; <i32> [#uses=1]
+  %CHAIN.310.0.0.0.val56.i9 = zext i32 %CHAIN.310.0.0.0.val5.i8 to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val2.i10 = zext i32 %tmp32 to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.i11 = shl i64 %CHAIN.310.0.0.1.val2.i10, 32    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.ins.i12 = or i64 %CHAIN.310.0.0.1.val23.i11, %CHAIN.310.0.0.0.val56.i9   ; <i64> [#uses=1]
+  call void @system__secondary_stack__ss_release( i64 %CHAIN.310.0.0.1.val23.ins.i12 )
+  call i32 (...)* @_Unwind_Resume( i8* %eh_exception.0 )    ; <i32>:0 [#uses=0]
+  unreachable
+
+UnifiedUnreachableBlock:    ; preds = %bb555, %cond_true497, %bb336, %cond_true276, %bb215, %cond_true157, %bb97, %cond_true43
+  unreachable
+}
+
+declare i8* @llvm.stacksave()
+
+declare void @llvm.stackrestore(i8*)
+
+declare i32 @report__ident_int(i32 %x)
+
+declare i8 @report__equal(i32 %x, i32 %y)
+
+declare i8 @report__ident_char(i8 zeroext  %x)
+
+declare i16 @report__ident_wide_char(i16 zeroext  %x)
+
+declare i8 @report__ident_bool(i8 %x)
+
+declare void @report__ident_str(%struct.string___XUP* sret  %agg.result, i64 %x.0.0)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @report__ident_wide_str(%struct.wide_string___XUP* sret  %agg.result, i64 %x.0.0)
+
+declare void @__gnat_begin_handler(i8*)
+
+declare void @__gnat_end_handler(i8*)
+
+declare void @report__legal_file_name(%struct.string___XUP* sret  %agg.result, i32 %x, i64 %nam.0.0)
+
+declare void @__gnat_rcheck_06(i8*, i32)
+
+declare void @system__string_ops__str_concat_cs(%struct.string___XUP* sret , i8 zeroext , i64)

diff --git a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
new file mode 100644
index 0000000..36a97ef
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll

@@ -0,0 +1,129 @@
+; PR1495
+; RUN: llc < %s -march=x86
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.AVRational = type { i32, i32 }
+	%struct.FFTComplex = type { float, float }
+	%struct.FFTContext = type { i32, i32, i16*, %struct.FFTComplex*, %struct.FFTComplex*, void (%struct.FFTContext*, %struct.FFTComplex*)*, void (%struct.MDCTContext*, float*, float*, float*)* }
+	%struct.MDCTContext = type { i32, i32, float*, float*, %struct.FFTContext }
+	%struct.Minima = type { i32, i32, i32, i32 }
+	%struct.codebook_t = type { i32, i8*, i32*, i32, float, float, i32, i32, i32*, float*, float* }
+	%struct.floor_class_t = type { i32, i32, i32, i32* }
+	%struct.floor_t = type { i32, i32*, i32, %struct.floor_class_t*, i32, i32, i32, %struct.Minima* }
+	%struct.mapping_t = type { i32, i32*, i32*, i32*, i32, i32*, i32* }
+	%struct.residue_t = type { i32, i32, i32, i32, i32, i32, [8 x i8]*, [2 x float]* }
+	%struct.venc_context_t = type { i32, i32, [2 x i32], [2 x %struct.MDCTContext], [2 x float*], i32, float*, float*, float*, float*, float, i32, %struct.codebook_t*, i32, %struct.floor_t*, i32, %struct.residue_t*, i32, %struct.mapping_t*, i32, %struct.AVRational* }
+
+define fastcc i32 @put_main_header(%struct.venc_context_t* %venc, i8** %out) {
+entry:
+	br i1 false, label %bb1820, label %bb288.bb148_crit_edge
+
+bb288.bb148_crit_edge:		; preds = %entry
+	ret i32 0
+
+cond_next1712:		; preds = %bb1820.bb1680_crit_edge
+	ret i32 0
+
+bb1817:		; preds = %bb1820.bb1680_crit_edge
+	br label %bb1820
+
+bb1820:		; preds = %bb1817, %entry
+	%pb.1.50 = phi i32 [ %tmp1693, %bb1817 ], [ 8, %entry ]		; <i32> [#uses=3]
+	br i1 false, label %bb2093, label %bb1820.bb1680_crit_edge
+
+bb1820.bb1680_crit_edge:		; preds = %bb1820
+	%tmp1693 = add i32 %pb.1.50, 8		; <i32> [#uses=2]
+	%tmp1702 = icmp slt i32 %tmp1693, 0		; <i1> [#uses=1]
+	br i1 %tmp1702, label %cond_next1712, label %bb1817
+
+bb2093:		; preds = %bb1820
+	%tmp2102 = add i32 %pb.1.50, 65		; <i32> [#uses=0]
+	%tmp2236 = add i32 %pb.1.50, 72		; <i32> [#uses=1]
+	%tmp2237 = sdiv i32 %tmp2236, 8		; <i32> [#uses=2]
+	br i1 false, label %bb2543, label %bb2536.bb2396_crit_edge
+
+bb2536.bb2396_crit_edge:		; preds = %bb2093
+	ret i32 0
+
+bb2543:		; preds = %bb2093
+	br i1 false, label %cond_next2576, label %bb2690
+
+cond_next2576:		; preds = %bb2543
+	ret i32 0
+
+bb2682:		; preds = %bb2690
+	ret i32 0
+
+bb2690:		; preds = %bb2543
+	br i1 false, label %bb2682, label %bb2698
+
+bb2698:		; preds = %bb2690
+	br i1 false, label %cond_next2726, label %bb2831
+
+cond_next2726:		; preds = %bb2698
+	ret i32 0
+
+bb2831:		; preds = %bb2698
+	br i1 false, label %cond_next2859, label %bb2964
+
+cond_next2859:		; preds = %bb2831
+	br i1 false, label %bb2943, label %cond_true2866
+
+cond_true2866:		; preds = %cond_next2859
+	br i1 false, label %cond_true2874, label %cond_false2897
+
+cond_true2874:		; preds = %cond_true2866
+	ret i32 0
+
+cond_false2897:		; preds = %cond_true2866
+	ret i32 0
+
+bb2943:		; preds = %cond_next2859
+	ret i32 0
+
+bb2964:		; preds = %bb2831
+	br i1 false, label %cond_next2997, label %bb4589
+
+cond_next2997:		; preds = %bb2964
+	ret i32 0
+
+bb3103:		; preds = %bb4589
+	ret i32 0
+
+bb4589:		; preds = %bb2964
+	br i1 false, label %bb3103, label %bb4597
+
+bb4597:		; preds = %bb4589
+	br i1 false, label %cond_next4630, label %bb4744
+
+cond_next4630:		; preds = %bb4597
+	br i1 false, label %bb4744, label %cond_true4724
+
+cond_true4724:		; preds = %cond_next4630
+	br i1 false, label %bb4736, label %bb7531
+
+bb4736:		; preds = %cond_true4724
+	ret i32 0
+
+bb4744:		; preds = %cond_next4630, %bb4597
+	ret i32 0
+
+bb7531:		; preds = %cond_true4724
+	%v_addr.023.0.i6 = add i32 %tmp2237, -255		; <i32> [#uses=1]
+	br label %bb.i14
+
+bb.i14:		; preds = %bb.i14, %bb7531
+	%n.021.0.i8 = phi i32 [ 0, %bb7531 ], [ %indvar.next, %bb.i14 ]		; <i32> [#uses=2]
+	%tmp..i9 = mul i32 %n.021.0.i8, -255		; <i32> [#uses=1]
+	%tmp5.i11 = add i32 %v_addr.023.0.i6, %tmp..i9		; <i32> [#uses=1]
+	%tmp10.i12 = icmp ugt i32 %tmp5.i11, 254		; <i1> [#uses=1]
+	%indvar.next = add i32 %n.021.0.i8, 1		; <i32> [#uses=1]
+	br i1 %tmp10.i12, label %bb.i14, label %bb12.loopexit.i18
+
+bb12.loopexit.i18:		; preds = %bb.i14
+	call void @llvm.memcpy.i32( i8* null, i8* null, i32 %tmp2237, i32 1 )
+	ret i32 0
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)

diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll
new file mode 100644
index 0000000..2680b15
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-14-branchfold.ll

@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp
+; check that branch folding understands FP_REG_KILL is not a branch
+
+target triple = "i686-pc-linux-gnu"
+  %struct.FRAME.c34003a = type { float, float }
+@report_E = global i8 0   ; <i8*> [#uses=0]
+
+define void @main() {
+entry:
+  %FRAME.31 = alloca %struct.FRAME.c34003a, align 8   ; <%struct.FRAME.c34003a*> [#uses=4]
+  %tmp20 = call i32 @report__ident_int( i32 -50 )   ; <i32> [#uses=1]
+  %tmp2021 = sitofp i32 %tmp20 to float   ; <float> [#uses=5]
+  %tmp23 = fcmp ult float %tmp2021, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
+  %tmp26 = fcmp ugt float %tmp2021, 0x47EFFFFFE0000000    ; <i1> [#uses=1]
+  %bothcond = or i1 %tmp23, %tmp26    ; <i1> [#uses=1]
+  br i1 %bothcond, label %bb, label %bb30
+
+bb:   ; preds = %entry
+  unwind
+
+bb30:   ; preds = %entry
+  %tmp35 = call i32 @report__ident_int( i32 50 )    ; <i32> [#uses=1]
+  %tmp3536 = sitofp i32 %tmp35 to float   ; <float> [#uses=4]
+  %tmp38 = fcmp ult float %tmp3536, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
+  %tmp44 = fcmp ugt float %tmp3536, 0x47EFFFFFE0000000    ; <i1> [#uses=1]
+  %bothcond226 = or i1 %tmp38, %tmp44   ; <i1> [#uses=1]
+  br i1 %bothcond226, label %bb47, label %bb49
+
+bb47:   ; preds = %bb30
+  unwind
+
+bb49:   ; preds = %bb30
+  %tmp60 = fcmp ult float %tmp3536, %tmp2021    ; <i1> [#uses=1]
+  %tmp60.not = xor i1 %tmp60, true    ; <i1> [#uses=1]
+  %tmp65 = fcmp olt float %tmp2021, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
+  %bothcond227 = and i1 %tmp65, %tmp60.not    ; <i1> [#uses=1]
+  br i1 %bothcond227, label %cond_true68, label %cond_next70
+
+cond_true68:    ; preds = %bb49
+  unwind
+
+cond_next70:    ; preds = %bb49
+  %tmp71 = call i32 @report__ident_int( i32 -30 )   ; <i32> [#uses=1]
+  %tmp7172 = sitofp i32 %tmp71 to float   ; <float> [#uses=3]
+  %tmp74 = fcmp ult float %tmp7172, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
+  %tmp80 = fcmp ugt float %tmp7172, 0x47EFFFFFE0000000    ; <i1> [#uses=1]
+  %bothcond228 = or i1 %tmp74, %tmp80   ; <i1> [#uses=1]
+  br i1 %bothcond228, label %bb83, label %bb85
+
+bb83:   ; preds = %cond_next70
+  unwind
+
+bb85:   ; preds = %cond_next70
+  %tmp90 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 1   ; <float*> [#uses=3]
+  store float %tmp7172, float* %tmp90
+  %tmp92 = call i32 @report__ident_int( i32 30 )    ; <i32> [#uses=1]
+  %tmp9293 = sitofp i32 %tmp92 to float   ; <float> [#uses=7]
+  %tmp95 = fcmp ult float %tmp9293, 0xC7EFFFFFE0000000    ; <i1> [#uses=1]
+  %tmp101 = fcmp ugt float %tmp9293, 0x47EFFFFFE0000000   ; <i1> [#uses=1]
+  %bothcond229 = or i1 %tmp95, %tmp101    ; <i1> [#uses=1]
+  br i1 %bothcond229, label %bb104, label %bb106
+
+bb104:    ; preds = %bb85
+  unwind
+
+bb106:    ; preds = %bb85
+  %tmp111 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 0    ; <float*> [#uses=2]
+  store float %tmp9293, float* %tmp111
+  %tmp123 = load float* %tmp90    ; <float> [#uses=4]
+  %tmp125 = fcmp ult float %tmp9293, %tmp123    ; <i1> [#uses=1]
+  br i1 %tmp125, label %cond_next147, label %cond_true128
+
+cond_true128:   ; preds = %bb106
+  %tmp133 = fcmp olt float %tmp123, %tmp2021    ; <i1> [#uses=1]
+  %tmp142 = fcmp ogt float %tmp9293, %tmp3536   ; <i1> [#uses=1]
+  %bothcond230 = or i1 %tmp133, %tmp142   ; <i1> [#uses=1]
+  br i1 %bothcond230, label %bb145, label %cond_next147
+
+bb145:    ; preds = %cond_true128
+  unwind
+
+cond_next147:   ; preds = %cond_true128, %bb106
+  %tmp157 = fcmp ugt float %tmp123, -3.000000e+01   ; <i1> [#uses=1]
+  %tmp165 = fcmp ult float %tmp9293, -3.000000e+01    ; <i1> [#uses=1]
+  %bothcond231 = or i1 %tmp157, %tmp165   ; <i1> [#uses=1]
+  br i1 %bothcond231, label %bb168, label %bb169
+
+bb168:    ; preds = %cond_next147
+  unwind
+
+bb169:    ; preds = %cond_next147
+  %tmp176 = fcmp ugt float %tmp123, 3.000000e+01    ; <i1> [#uses=1]
+  %tmp184 = fcmp ult float %tmp9293, 3.000000e+01   ; <i1> [#uses=1]
+  %bothcond232 = or i1 %tmp176, %tmp184   ; <i1> [#uses=1]
+  br i1 %bothcond232, label %bb187, label %bb188
+
+bb187:    ; preds = %bb169
+  unwind
+
+bb188:    ; preds = %bb169
+  %tmp192 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 3.000000e+01 )   ; <float> [#uses=2]
+  %tmp194 = load float* %tmp90    ; <float> [#uses=1]
+  %tmp196 = fcmp ugt float %tmp194, 0.000000e+00    ; <i1> [#uses=1]
+  br i1 %tmp196, label %bb207, label %cond_next200
+
+cond_next200:   ; preds = %bb188
+  %tmp202 = load float* %tmp111   ; <float> [#uses=1]
+  %tmp204 = fcmp ult float %tmp202, 0.000000e+00    ; <i1> [#uses=1]
+  br i1 %tmp204, label %bb207, label %bb208
+
+bb207:    ; preds = %cond_next200, %bb188
+  unwind
+
+bb208:    ; preds = %cond_next200
+  %tmp212 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 0.000000e+00 )   ; <float> [#uses=1]
+  %tmp214 = fcmp oge float %tmp212, %tmp192   ; <i1> [#uses=1]
+  %tmp217 = fcmp oge float %tmp192, 1.000000e+02    ; <i1> [#uses=1]
+  %tmp221 = or i1 %tmp214, %tmp217    ; <i1> [#uses=1]
+  br i1 %tmp221, label %cond_true224, label %UnifiedReturnBlock
+
+cond_true224:   ; preds = %bb208
+  call void @abort( ) noreturn
+  ret void
+
+UnifiedReturnBlock:   ; preds = %bb208
+  ret void
+}
+
+declare fastcc float @c34003a__ident.154(%struct.FRAME.c34003a* %CHAIN.32, float %x) 
+
+declare i32 @report__ident_int(i32 %x)
+
+declare void @abort() noreturn

diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
new file mode 100644
index 0000000..6128d8b
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
+@R = external global <1 x i64>          ; <<1 x i64>*> [#uses=1]
+
+define void @foo(<1 x i64> %A, <1 x i64> %B) {
+entry:
+        %tmp4 = bitcast <1 x i64> %B to <4 x i16>               ; <<4 x i16>> [#uses=1]
+        %tmp6 = bitcast <1 x i64> %A to <4 x i16>               ; <<4 x i16>> [#uses=1]
+        %tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 )   ; <<4 x i16>> [#uses=1]
+        %tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64>            ; <<1 x i64>> [#uses=1]
+        store <1 x i64> %tmp8, <1 x i64>* @R
+        tail call void @llvm.x86.mmx.emms( )
+        ret void
+}
+
+declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+
+declare void @llvm.x86.mmx.emms()

diff --git a/test/CodeGen/X86/2007-06-28-X86-64-isel.ll b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
new file mode 100644
index 0000000..9d42c49
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-28-X86-64-isel.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
+
+define void @test() {
+	%tmp1 = call <8 x i16> @llvm.x86.sse2.pmins.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) )
+	%tmp2 = bitcast <8 x i16> %tmp1 to <4 x i32>
+	br i1 false, label %bb1, label %bb2
+
+bb2:
+	%tmp38007.i = extractelement <4 x i32> %tmp2, i32 3
+	ret void
+
+bb1:
+	ret void
+}
+
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>)

diff --git a/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
new file mode 100644
index 0000000..d2d6388
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @test() {
+entry:
+	br i1 false, label %bb13944.preheader, label %cond_true418
+
+cond_true418:		; preds = %entry
+	ret void
+
+bb13944.preheader:		; preds = %entry
+	br i1 false, label %bb3517, label %bb13968.preheader
+
+bb3517:		; preds = %bb13944.preheader
+	br i1 false, label %cond_false7408, label %cond_next11422
+
+cond_false7408:		; preds = %bb3517
+	switch i32 0, label %cond_false10578 [
+		 i32 7, label %cond_next11422
+		 i32 6, label %cond_true7828
+		 i32 1, label %cond_true10095
+		 i32 3, label %cond_true10095
+		 i32 5, label %cond_true10176
+		 i32 24, label %cond_true10176
+	]
+
+cond_true7828:		; preds = %cond_false7408
+	br i1 false, label %cond_next8191, label %cond_true8045
+
+cond_true8045:		; preds = %cond_true7828
+	ret void
+
+cond_next8191:		; preds = %cond_true7828
+	%tmp8234 = sub <4 x i32> < i32 939524096, i32 939524096, i32 939524096, i32 939524096 >, zeroinitializer		; <<4 x i32>> [#uses=0]
+	ret void
+
+cond_true10095:		; preds = %cond_false7408, %cond_false7408
+	ret void
+
+cond_true10176:		; preds = %cond_false7408, %cond_false7408
+	ret void
+
+cond_false10578:		; preds = %cond_false7408
+	ret void
+
+cond_next11422:		; preds = %cond_false7408, %bb3517
+	ret void
+
+bb13968.preheader:		; preds = %bb13944.preheader
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
new file mode 100644
index 0000000..dc11eec
--- /dev/null
+++ b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @test(<4 x float>* %arg) {
+	%tmp89 = getelementptr <4 x float>* %arg, i64 3
+	%tmp1144 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
+	store <4 x float> %tmp1144, <4 x float>* null
+	%tmp1149 = load <4 x float>* %tmp89
+	%tmp1150 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
+	store <4 x float> %tmp1150, <4 x float>* %tmp89
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
new file mode 100644
index 0000000..2c513f1
--- /dev/null
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
+
+@R = external global <1 x i64>		; <<1 x i64>*> [#uses=1]
+
+define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind {
+entry:
+	%tmp4 = bitcast <1 x i64> %B to <4 x i16>		; <<4 x i16>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %A to <4 x i16>		; <<4 x i16>> [#uses=1]
+	%tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 )		; <<4 x i16>> [#uses=1]
+	%tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp8, <1 x i64>* @R
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+
+declare void @llvm.x86.mmx.emms()

diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
new file mode 100644
index 0000000..d611677
--- /dev/null
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
+; PR1545
+
+@.str97 = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
+
+declare void @PR_LogPrint(i8*, ...)
+
+define i32 @_ZN13nsPrintEngine19SetupToPrintContentEP16nsIDeviceContextP12nsIDOMWindow() {
+entry:
+	br i1 false, label %cond_true122, label %cond_next453
+
+cond_true122:		; preds = %entry
+	br i1 false, label %bb164, label %cond_true136
+
+cond_true136:		; preds = %cond_true122
+	ret i32 0
+
+bb164:		; preds = %cond_true122
+	br i1 false, label %bb383, label %cond_true354
+
+cond_true354:		; preds = %bb164
+	ret i32 0
+
+bb383:		; preds = %bb164
+	%tmp408 = load float* null		; <float> [#uses=2]
+	br i1 false, label %cond_true425, label %cond_next443
+
+cond_true425:		; preds = %bb383
+	%tmp430 = load float* null		; <float> [#uses=1]
+	%tmp432 = fsub float %tmp430, %tmp408		; <float> [#uses=1]
+	%tmp432433 = fpext float %tmp432 to double		; <double> [#uses=1]
+	%tmp434435 = fpext float %tmp408 to double		; <double> [#uses=1]
+	call void (i8*, ...)* @PR_LogPrint( i8* getelementptr ([56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
+	ret i32 0
+
+cond_next443:		; preds = %bb383
+	ret i32 0
+
+cond_next453:		; preds = %entry
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
new file mode 100644
index 0000000..8625b27
--- /dev/null
+++ b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	(%rdi), %rax}
+; RUN: llc < %s -march=x86-64 -mattr=+sse | grep {movq	8(%rdi), %rax}
+define i64 @foo_0(<2 x i64>* %val) {
+entry:
+        %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
+        %tmp7 = load i64* %val12                ; <i64> [#uses=1]
+        ret i64 %tmp7
+}
+
+define i64 @foo_1(<2 x i64>* %val) {
+entry:
+        %tmp2.gep = getelementptr <2 x i64>* %val, i32 0, i32 1         ; <i64*> [#uses=1]
+        %tmp4 = load i64* %tmp2.gep             ; <i64> [#uses=1]
+        ret i64 %tmp4
+}

diff --git a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
new file mode 100644
index 0000000..3cd8052
--- /dev/null
+++ b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep movl
+
+define i8 @t(i8 zeroext  %x, i8 zeroext  %y) zeroext  {
+	%tmp2 = add i8 %x, 2
+	%tmp4 = add i8 %y, -2
+	%tmp5 = mul i8 %tmp4, %tmp2
+	ret i8 %tmp5
+}

diff --git a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
new file mode 100644
index 0000000..7768f36
--- /dev/null
+++ b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll

@@ -0,0 +1,235 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, [4 x i8], i64 }
+	%struct.PyBoolScalarObject = type { i64, %struct._typeobject*, i8 }
+	%struct.PyBufferProcs = type { i64 (%struct.PyObject*, i64, i8**)*, i64 (%struct.PyObject*, i64, i8**)*, i64 (%struct.PyObject*, i64*)*, i64 (%struct.PyObject*, i64, i8**)* }
+	%struct.PyGetSetDef = type { i8*, %struct.PyObject* (%struct.PyObject*, i8*)*, i32 (%struct.PyObject*, %struct.PyObject*, i8*)*, i8*, i8* }
+	%struct.PyMappingMethods = type { i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)* }
+	%struct.PyMemberDef = type opaque
+	%struct.PyMethodDef = type { i8*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32, i8* }
+	%struct.PyNumberMethods = type { %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, i32 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject**, %struct.PyObject**)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)* }
+	%struct.PyObject = type { i64, %struct._typeobject* }
+	%struct.PySequenceMethods = type { i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, i64)*, %struct.PyObject* (%struct.PyObject*, i64)*, %struct.PyObject* (%struct.PyObject*, i64, i64)*, i32 (%struct.PyObject*, i64, %struct.PyObject*)*, i32 (%struct.PyObject*, i64, i64, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, i64)* }
+	%struct.PyTupleObject = type { i64, %struct._typeobject*, i64, [1 x %struct.PyObject*] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._typeobject = type { i64, %struct._typeobject*, i64, i8*, i64, i64, void (%struct.PyObject*)*, i32 (%struct.PyObject*, %struct.FILE*, i32)*, %struct.PyObject* (%struct.PyObject*, i8*)*, i32 (%struct.PyObject*, i8*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyNumberMethods*, %struct.PySequenceMethods*, %struct.PyMappingMethods*, i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyBufferProcs*, i64, i8*, i32 (%struct.PyObject*, i32 (%struct.PyObject*, i8*)*, i8*)*, i32 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, i32)*, i64, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyMethodDef*, %struct.PyMemberDef*, %struct.PyGetSetDef*, %struct._typeobject*, %struct.PyObject*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, i64, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct._typeobject*, i64)*, %struct.PyObject* (%struct._typeobject*, %struct.PyObject*, %struct.PyObject*)*, void (i8*)*, i32 (%struct.PyObject*)*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, void (%struct.PyObject*)* }
+@PyArray_API = external global i8**		; <i8***> [#uses=4]
+@PyUFunc_API = external global i8**		; <i8***> [#uses=4]
+@.str5 = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define %struct.PyObject* @ubyte_divmod(%struct.PyObject* %a, %struct.PyObject* %b) {
+entry:
+	%arg1 = alloca i8, align 1		; <i8*> [#uses=3]
+	%arg2 = alloca i8, align 1		; <i8*> [#uses=3]
+	%first = alloca i32, align 4		; <i32*> [#uses=2]
+	%bufsize = alloca i32, align 4		; <i32*> [#uses=1]
+	%errmask = alloca i32, align 4		; <i32*> [#uses=2]
+	%errobj = alloca %struct.PyObject*, align 8		; <%struct.PyObject**> [#uses=2]
+	%tmp3.i = call fastcc i32 @_ubyte_convert_to_ctype( %struct.PyObject* %a, i8* %arg1 )		; <i32> [#uses=2]
+	%tmp5.i = icmp slt i32 %tmp3.i, 0		; <i1> [#uses=1]
+	br i1 %tmp5.i, label %_ubyte_convert2_to_ctypes.exit, label %cond_next.i
+
+cond_next.i:		; preds = %entry
+	%tmp11.i = call fastcc i32 @_ubyte_convert_to_ctype( %struct.PyObject* %b, i8* %arg2 )		; <i32> [#uses=2]
+	%tmp13.i = icmp slt i32 %tmp11.i, 0		; <i1> [#uses=1]
+	%retval.i = select i1 %tmp13.i, i32 %tmp11.i, i32 0		; <i32> [#uses=1]
+	switch i32 %retval.i, label %bb35 [
+		 i32 -2, label %bb17
+		 i32 -1, label %bb4
+	]
+
+_ubyte_convert2_to_ctypes.exit:		; preds = %entry
+	switch i32 %tmp3.i, label %bb35 [
+		 i32 -2, label %bb17
+		 i32 -1, label %bb4
+	]
+
+bb4:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp5 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp6 = getelementptr i8** %tmp5, i64 2		; <i8**> [#uses=1]
+	%tmp7 = load i8** %tmp6		; <i8*> [#uses=1]
+	%tmp78 = bitcast i8* %tmp7 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
+	%tmp9 = getelementptr %struct._typeobject* %tmp78, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
+	%tmp10 = load %struct.PyNumberMethods** %tmp9		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp11 = getelementptr %struct.PyNumberMethods* %tmp10, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+	%tmp12 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp11		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp15 = call %struct.PyObject* %tmp12( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
+	ret %struct.PyObject* %tmp15
+
+bb17:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp18 = call %struct.PyObject* @PyErr_Occurred( )		; <%struct.PyObject*> [#uses=1]
+	%tmp19 = icmp eq %struct.PyObject* %tmp18, null		; <i1> [#uses=1]
+	br i1 %tmp19, label %cond_next, label %UnifiedReturnBlock
+
+cond_next:		; preds = %bb17
+	%tmp22 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp23 = getelementptr i8** %tmp22, i64 10		; <i8**> [#uses=1]
+	%tmp24 = load i8** %tmp23		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
+	%tmp26 = getelementptr %struct._typeobject* %tmp2425, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
+	%tmp27 = load %struct.PyNumberMethods** %tmp26		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp28 = getelementptr %struct.PyNumberMethods* %tmp27, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+	%tmp29 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp28		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp32 = call %struct.PyObject* %tmp29( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
+	ret %struct.PyObject* %tmp32
+
+bb35:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp36 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp37 = getelementptr i8** %tmp36, i64 27		; <i8**> [#uses=1]
+	%tmp38 = load i8** %tmp37		; <i8*> [#uses=1]
+	%tmp3839 = bitcast i8* %tmp38 to void ()*		; <void ()*> [#uses=1]
+	call void %tmp3839( )
+	%tmp40 = load i8* %arg2, align 1		; <i8> [#uses=4]
+	%tmp1.i = icmp eq i8 %tmp40, 0		; <i1> [#uses=2]
+	br i1 %tmp1.i, label %cond_true.i, label %cond_false.i
+
+cond_true.i:		; preds = %bb35
+	%tmp3.i196 = call i32 @feraiseexcept( i32 4 )		; <i32> [#uses=0]
+	%tmp46207 = load i8* %arg2, align 1		; <i8> [#uses=3]
+	%tmp48208 = load i8* %arg1, align 1		; <i8> [#uses=2]
+	%tmp1.i197210 = icmp eq i8 %tmp48208, 0		; <i1> [#uses=1]
+	%tmp4.i212 = icmp eq i8 %tmp46207, 0		; <i1> [#uses=1]
+	%tmp7.i198213 = or i1 %tmp1.i197210, %tmp4.i212		; <i1> [#uses=1]
+	br i1 %tmp7.i198213, label %cond_true.i200, label %cond_next17.i
+
+cond_false.i:		; preds = %bb35
+	%tmp42 = load i8* %arg1, align 1		; <i8> [#uses=3]
+	%tmp7.i = udiv i8 %tmp42, %tmp40		; <i8> [#uses=2]
+	%tmp1.i197 = icmp eq i8 %tmp42, 0		; <i1> [#uses=1]
+	%tmp7.i198 = or i1 %tmp1.i197, %tmp1.i		; <i1> [#uses=1]
+	br i1 %tmp7.i198, label %cond_true.i200, label %cond_next17.i
+
+cond_true.i200:		; preds = %cond_false.i, %cond_true.i
+	%out.0 = phi i8 [ 0, %cond_true.i ], [ %tmp7.i, %cond_false.i ]		; <i8> [#uses=2]
+	%tmp46202.0 = phi i8 [ %tmp46207, %cond_true.i ], [ %tmp40, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp11.i199 = icmp eq i8 %tmp46202.0, 0		; <i1> [#uses=1]
+	br i1 %tmp11.i199, label %cond_true14.i, label %ubyte_ctype_remainder.exit
+
+cond_true14.i:		; preds = %cond_true.i200
+	%tmp15.i = call i32 @feraiseexcept( i32 4 )		; <i32> [#uses=0]
+	br label %ubyte_ctype_remainder.exit
+
+cond_next17.i:		; preds = %cond_false.i, %cond_true.i
+	%out.1 = phi i8 [ 0, %cond_true.i ], [ %tmp7.i, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp46202.1 = phi i8 [ %tmp46207, %cond_true.i ], [ %tmp40, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp48205.1 = phi i8 [ %tmp48208, %cond_true.i ], [ %tmp42, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp20.i = urem i8 %tmp48205.1, %tmp46202.1		; <i8> [#uses=1]
+	br label %ubyte_ctype_remainder.exit
+
+ubyte_ctype_remainder.exit:		; preds = %cond_next17.i, %cond_true14.i, %cond_true.i200
+	%out2.0 = phi i8 [ %tmp20.i, %cond_next17.i ], [ 0, %cond_true14.i ], [ 0, %cond_true.i200 ]		; <i8> [#uses=1]
+	%out.2 = phi i8 [ %out.1, %cond_next17.i ], [ %out.0, %cond_true14.i ], [ %out.0, %cond_true.i200 ]		; <i8> [#uses=1]
+	%tmp52 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp53 = getelementptr i8** %tmp52, i64 28		; <i8**> [#uses=1]
+	%tmp54 = load i8** %tmp53		; <i8*> [#uses=1]
+	%tmp5455 = bitcast i8* %tmp54 to i32 ()*		; <i32 ()*> [#uses=1]
+	%tmp56 = call i32 %tmp5455( )		; <i32> [#uses=2]
+	%tmp58 = icmp eq i32 %tmp56, 0		; <i1> [#uses=1]
+	br i1 %tmp58, label %cond_next89, label %cond_true61
+
+cond_true61:		; preds = %ubyte_ctype_remainder.exit
+	%tmp62 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp63 = getelementptr i8** %tmp62, i64 25		; <i8**> [#uses=1]
+	%tmp64 = load i8** %tmp63		; <i8*> [#uses=1]
+	%tmp6465 = bitcast i8* %tmp64 to i32 (i8*, i32*, i32*, %struct.PyObject**)*		; <i32 (i8*, i32*, i32*, %struct.PyObject**)*> [#uses=1]
+	%tmp67 = call i32 %tmp6465( i8* getelementptr ([14 x i8]* @.str5, i32 0, i64 0), i32* %bufsize, i32* %errmask, %struct.PyObject** %errobj )		; <i32> [#uses=1]
+	%tmp68 = icmp slt i32 %tmp67, 0		; <i1> [#uses=1]
+	br i1 %tmp68, label %UnifiedReturnBlock, label %cond_next73
+
+cond_next73:		; preds = %cond_true61
+	store i32 1, i32* %first, align 4
+	%tmp74 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp75 = getelementptr i8** %tmp74, i64 29		; <i8**> [#uses=1]
+	%tmp76 = load i8** %tmp75		; <i8*> [#uses=1]
+	%tmp7677 = bitcast i8* %tmp76 to i32 (i32, %struct.PyObject*, i32, i32*)*		; <i32 (i32, %struct.PyObject*, i32, i32*)*> [#uses=1]
+	%tmp79 = load %struct.PyObject** %errobj, align 8		; <%struct.PyObject*> [#uses=1]
+	%tmp80 = load i32* %errmask, align 4		; <i32> [#uses=1]
+	%tmp82 = call i32 %tmp7677( i32 %tmp80, %struct.PyObject* %tmp79, i32 %tmp56, i32* %first )		; <i32> [#uses=1]
+	%tmp83 = icmp eq i32 %tmp82, 0		; <i1> [#uses=1]
+	br i1 %tmp83, label %cond_next89, label %UnifiedReturnBlock
+
+cond_next89:		; preds = %cond_next73, %ubyte_ctype_remainder.exit
+	%tmp90 = call %struct.PyObject* @PyTuple_New( i64 2 )		; <%struct.PyObject*> [#uses=9]
+	%tmp92 = icmp eq %struct.PyObject* %tmp90, null		; <i1> [#uses=1]
+	br i1 %tmp92, label %UnifiedReturnBlock, label %cond_next97
+
+cond_next97:		; preds = %cond_next89
+	%tmp98 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp99 = getelementptr i8** %tmp98, i64 25		; <i8**> [#uses=1]
+	%tmp100 = load i8** %tmp99		; <i8*> [#uses=1]
+	%tmp100101 = bitcast i8* %tmp100 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
+	%tmp102 = getelementptr %struct._typeobject* %tmp100101, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+	%tmp103 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp102		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp108 = call %struct.PyObject* %tmp103( %struct._typeobject* %tmp100101, i64 0 )		; <%struct.PyObject*> [#uses=3]
+	%tmp110 = icmp eq %struct.PyObject* %tmp108, null		; <i1> [#uses=1]
+	br i1 %tmp110, label %cond_true113, label %cond_next135
+
+cond_true113:		; preds = %cond_next97
+	%tmp115 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
+	%tmp116 = load i64* %tmp115		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp116, -1		; <i64> [#uses=2]
+	store i64 %tmp117, i64* %tmp115
+	%tmp123 = icmp eq i64 %tmp117, 0		; <i1> [#uses=1]
+	br i1 %tmp123, label %cond_true126, label %UnifiedReturnBlock
+
+cond_true126:		; preds = %cond_true113
+	%tmp128 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
+	%tmp129 = load %struct._typeobject** %tmp128		; <%struct._typeobject*> [#uses=1]
+	%tmp130 = getelementptr %struct._typeobject* %tmp129, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
+	%tmp131 = load void (%struct.PyObject*)** %tmp130		; <void (%struct.PyObject*)*> [#uses=1]
+	call void %tmp131( %struct.PyObject* %tmp90 )
+	ret %struct.PyObject* null
+
+cond_next135:		; preds = %cond_next97
+	%tmp136137 = bitcast %struct.PyObject* %tmp108 to %struct.PyBoolScalarObject*		; <%struct.PyBoolScalarObject*> [#uses=1]
+	%tmp139 = getelementptr %struct.PyBoolScalarObject* %tmp136137, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %out.2, i8* %tmp139
+	%tmp140141 = bitcast %struct.PyObject* %tmp90 to %struct.PyTupleObject*		; <%struct.PyTupleObject*> [#uses=2]
+	%tmp143 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 0		; <%struct.PyObject**> [#uses=1]
+	store %struct.PyObject* %tmp108, %struct.PyObject** %tmp143
+	%tmp145 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp146 = getelementptr i8** %tmp145, i64 25		; <i8**> [#uses=1]
+	%tmp147 = load i8** %tmp146		; <i8*> [#uses=1]
+	%tmp147148 = bitcast i8* %tmp147 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
+	%tmp149 = getelementptr %struct._typeobject* %tmp147148, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+	%tmp150 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp149		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp155 = call %struct.PyObject* %tmp150( %struct._typeobject* %tmp147148, i64 0 )		; <%struct.PyObject*> [#uses=3]
+	%tmp157 = icmp eq %struct.PyObject* %tmp155, null		; <i1> [#uses=1]
+	br i1 %tmp157, label %cond_true160, label %cond_next182
+
+cond_true160:		; preds = %cond_next135
+	%tmp162 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
+	%tmp163 = load i64* %tmp162		; <i64> [#uses=1]
+	%tmp164 = add i64 %tmp163, -1		; <i64> [#uses=2]
+	store i64 %tmp164, i64* %tmp162
+	%tmp170 = icmp eq i64 %tmp164, 0		; <i1> [#uses=1]
+	br i1 %tmp170, label %cond_true173, label %UnifiedReturnBlock
+
+cond_true173:		; preds = %cond_true160
+	%tmp175 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
+	%tmp176 = load %struct._typeobject** %tmp175		; <%struct._typeobject*> [#uses=1]
+	%tmp177 = getelementptr %struct._typeobject* %tmp176, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
+	%tmp178 = load void (%struct.PyObject*)** %tmp177		; <void (%struct.PyObject*)*> [#uses=1]
+	call void %tmp178( %struct.PyObject* %tmp90 )
+	ret %struct.PyObject* null
+
+cond_next182:		; preds = %cond_next135
+	%tmp183184 = bitcast %struct.PyObject* %tmp155 to %struct.PyBoolScalarObject*		; <%struct.PyBoolScalarObject*> [#uses=1]
+	%tmp186 = getelementptr %struct.PyBoolScalarObject* %tmp183184, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %out2.0, i8* %tmp186
+	%tmp190 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 1		; <%struct.PyObject**> [#uses=1]
+	store %struct.PyObject* %tmp155, %struct.PyObject** %tmp190
+	ret %struct.PyObject* %tmp90
+
+UnifiedReturnBlock:		; preds = %cond_true160, %cond_true113, %cond_next89, %cond_next73, %cond_true61, %bb17
+	ret %struct.PyObject* null
+}
+
+declare i32 @feraiseexcept(i32)
+
+declare fastcc i32 @_ubyte_convert_to_ctype(%struct.PyObject*, i8*)
+
+declare %struct.PyObject* @PyErr_Occurred()
+
+declare %struct.PyObject* @PyTuple_New(i64)

diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
new file mode 100644
index 0000000..e93092f
--- /dev/null
+++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep {movsbl}
+
+@X = global i32 0               ; <i32*> [#uses=1]
+
+define i8 @_Z3fooi(i32 %x) signext  {
+entry:
+        store i32 %x, i32* @X, align 4
+        %retval67 = trunc i32 %x to i8          ; <i8> [#uses=1]
+        ret i8 %retval67
+}

diff --git a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
new file mode 100644
index 0000000..c90a85f
--- /dev/null
+++ b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep drectve
+; PR1607
+
+%hlvm_programs_element = type { i8*, i32 (i32, i8**)* }
+@hlvm_programs = appending constant [1 x %hlvm_programs_element]
+zeroinitializer
+
+define %hlvm_programs_element* @hlvm_get_programs() {
+entry:
+  ret %hlvm_programs_element* getelementptr([1 x %hlvm_programs_element]*  
+                                            @hlvm_programs, i32 0, i32 0)
+}

diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
new file mode 100644
index 0000000..5acb051
--- /dev/null
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+
+	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
+	%struct.AGenericManager = type <{ i8 }>
+	%struct.ComponentInstanceRecord = type opaque
+	%struct.ComponentParameters = type { [1 x i64] }
+
+define i32 @_ZN12AGenericCall10MapIDPtrAtEsRP23ComponentInstanceRecord(%struct.AGenericCall* %this, i16 signext  %param, %struct.ComponentInstanceRecord** %instance) {
+entry:
+	%tmp4 = icmp slt i16 %param, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
+	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
+	%tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
+	store i32* %tmp2829, i32** %tmp31, align 8
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
+	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
+	%tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
+	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
+	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
+	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
+
+cond_true63:		; preds = %cond_next
+	%tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
+	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
+	ret i32 %tmp69
+
+UnifiedReturnBlock:		; preds = %cond_next
+	ret i32 undef
+}
+
+declare i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord(%struct.AGenericManager*, %struct.ComponentInstanceRecord**)

diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
new file mode 100644
index 0000000..c5d2a46
--- /dev/null
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | grep weak | count 2
+@__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+
+declare extern_weak i32 @pthread_once(i32*, void ()*)

diff --git a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
new file mode 100644
index 0000000..56ee2a3
--- /dev/null
+++ b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -enable-eh  | grep {isNullOrNil].eh"} | count 2
+
+	%struct.NSString = type {  }
+	%struct._objc__method_prototype_list = type opaque
+	%struct._objc_category = type { i8*, i8*, %struct._objc_method_list*, %struct._objc_method_list*, %struct._objc_protocol**, i32, %struct._prop_list_t* }
+	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
+	%struct._objc_method_list = type opaque
+	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol**, %struct._objc__method_prototype_list*, %struct._objc__method_prototype_list* }
+	%struct._objc_protocol_extension = type opaque
+	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16, [1 x i8*] }
+	%struct._prop_list_t = type opaque
+	%struct.anon = type { %struct._objc__method_prototype_list*, i32, [1 x %struct._objc_method] }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_SYMBOLS" = internal global { i32, i32, i16, i16, [1 x %struct._objc_category*] } {
+    i32 0, 
+    i32 0, 
+    i16 0, 
+    i16 1, 
+    [1 x %struct._objc_category*] [ %struct._objc_category* bitcast ({ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }* @"\01L_OBJC_CATEGORY_NSString_local" to %struct._objc_category*) ] }, section "__OBJC,__symbols,regular,no_dead_strip"		; <{ i32, i32, i16, i16, [1 x %struct._objc_category*] }*> [#uses=2]
+@"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" = internal global { i32, i32, [1 x %struct._objc_method] } {
+    i32 0, 
+    i32 1, 
+    [1 x %struct._objc_method] [ %struct._objc_method {
+        %struct.objc_selector* bitcast ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), 
+        i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), 
+        i8* bitcast (i8 (%struct.NSString*, %struct.objc_selector*) signext * @"-[NSString(local) isNullOrNil]" to i8*) } ] }, section "__OBJC,__cat_inst_meth,regular,no_dead_strip"		; <{ i32, i32, [1 x %struct._objc_method] }*> [#uses=3]
+@"\01L_OBJC_CATEGORY_NSString_local" = internal global { i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 } {
+    i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), 
+    i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), 
+    %struct._objc_method_list* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" to %struct._objc_method_list*), 
+    i32 0, 
+    i32 0, 
+    i32 28, 
+    i32 0 }, section "__OBJC,__category,regular,no_dead_strip"		; <{ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }*> [#uses=2]
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC,__image_info,regular"		; <[2 x i32]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module {
+    i32 7, 
+    i32 16, 
+    i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), 
+    %struct._objc_symtab* bitcast ({ i32, i32, i16, i16, [1 x %struct._objc_category*] }* @"\01L_OBJC_SYMBOLS" to %struct._objc_symtab*) }, section "__OBJC,__module_info,regular,no_dead_strip"		; <%struct._objc_module*> [#uses=1]
+@"\01.objc_class_ref_NSString" = internal global i8* @"\01.objc_class_name_NSString"		; <i8**> [#uses=0]
+@"\01.objc_class_name_NSString" = external global i8		; <i8*> [#uses=1]
+@"\01.objc_category_name_NSString_local" = constant i32 0		; <i32*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_2" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals"		; <[1 x i8]*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_1" = internal global [9 x i8] c"NSString\00", section "__TEXT,__cstring,cstring_literals"		; <[9 x i8]*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_0" = internal global [6 x i8] c"local\00", section "__TEXT,__cstring,cstring_literals"		; <[6 x i8]*> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_0" = internal global [12 x i8] c"isNullOrNil\00", section "__TEXT,__cstring,cstring_literals"		; <[12 x i8]*> [#uses=3]
+@"\01L_OBJC_METH_VAR_TYPE_0" = internal global [7 x i8] c"c8@0:4\00", section "__TEXT,__cstring,cstring_literals"		; <[7 x i8]*> [#uses=2]
+@llvm.used = appending global [11 x i8*] [ i8* bitcast ({ i32, i32, i16, i16, [1 x %struct._objc_category*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" to i8*), i8* bitcast ({ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }* @"\01L_OBJC_CATEGORY_NSString_local" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*), i8* bitcast (i32* @"\01.objc_category_name_NSString_local" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0) ], section "llvm.metadata"		; <[11 x i8*]*> [#uses=0]
+
+define internal i8 @"-[NSString(local) isNullOrNil]"(%struct.NSString* %self, %struct.objc_selector* %_cmd) signext  {
+entry:
+	%self_addr = alloca %struct.NSString*		; <%struct.NSString**> [#uses=1]
+	%_cmd_addr = alloca %struct.objc_selector*		; <%struct.objc_selector**> [#uses=1]
+	%retval = alloca i8, align 1		; <i8*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.NSString* %self, %struct.NSString** %self_addr
+	store %struct.objc_selector* %_cmd, %struct.objc_selector** %_cmd_addr
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i8* %retval		; <i8> [#uses=1]
+	ret i8 %retval1
+}

diff --git a/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
new file mode 100644
index 0000000..0ae1897
--- /dev/null
+++ b/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
+
+define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
+entry:
+	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:0 [#uses=167]
+	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:1 [#uses=170]
+	alloca [4 x <4 x i32>]		; <[4 x <4 x i32>]*>:2 [#uses=12]
+	%.sub6235.i = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0		; <<4 x float>*> [#uses=76]
+	%.sub.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0		; <<4 x float>*> [#uses=59]
+
+	%tmp124.i1062.i = getelementptr <4 x float>* %tmp116117.i1061.i, i32 63		; <<4 x float>*> [#uses=1]
+	%tmp125.i1063.i = load <4 x float>* %tmp124.i1062.i		; <<4 x float>> [#uses=5]
+	%tmp828.i1077.i = shufflevector <4 x float> %tmp125.i1063.i, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=4]
+	%tmp704.i1085.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
+	%tmp712.i1086.i = call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %tmp704.i1085.i, <4 x float> %tmp828.i1077.i )		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp712.i1086.i, <4 x float>* %.sub.i
+
+	%tmp2587.i1145.gep.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0, i32 2		; <float*> [#uses=1]
+	%tmp5334.i = load float* %tmp2587.i1145.gep.i		; <float> [#uses=5]
+	%tmp2723.i1170.i = insertelement <4 x float> undef, float %tmp5334.i, i32 2		; <<4 x float>> [#uses=5]
+	store <4 x float> %tmp2723.i1170.i, <4 x float>* %.sub6235.i
+
+	%tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
+	%tmp84.i1413.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
+	%tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i
+        ret i16 0
+}
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)

diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
new file mode 100644
index 0000000..4d69715
--- /dev/null
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define x86_fp80 @foo(x86_fp80 %x) nounwind{
+entry:
+	%tmp2 = call x86_fp80 @llvm.sqrt.f80( x86_fp80 %x )
+	ret x86_fp80 %tmp2
+        
+; CHECK: foo:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fsqrt
+; CHECK-NEXT: ret
+}
+
+declare x86_fp80 @llvm.sqrt.f80(x86_fp80)
+
+define x86_fp80 @bar(x86_fp80 %x) nounwind {
+entry:
+	%tmp2 = call x86_fp80 @llvm.powi.f80( x86_fp80 %x, i32 3 )
+	ret x86_fp80 %tmp2
+; CHECK: bar:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fld	%st(0)
+; CHECK-NEXT: fmul	%st(1)
+; CHECK-NEXT: fmulp	%st(1)
+; CHECK-NEXT: ret
+}
+
+declare x86_fp80 @llvm.powi.f80(x86_fp80, i32)

diff --git a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
new file mode 100644
index 0000000..6fc8ec9
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | not grep pushf
+
+	%struct.gl_texture_image = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8* }
+	%struct.gl_texture_object = type { i32, i32, i32, float, [4 x i32], i32, i32, i32, i32, i32, float, [11 x %struct.gl_texture_image*], [1024 x i8], i32, i32, i32, i8, i8*, i8, void (%struct.gl_texture_object*, i32, float*, float*, float*, float*, i8*, i8*, i8*, i8*)*, %struct.gl_texture_object* }
+
+define fastcc void @sample_3d_linear(%struct.gl_texture_object* %tObj, %struct.gl_texture_image* %img, float %s, float %t, float %r, i8* %red, i8* %green, i8* %blue, i8* %alpha) {
+entry:
+	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp16 = icmp eq i32 %tmp15, 10497		; <i1> [#uses=1]
+	%tmp2152 = call float @floorf( float 0.000000e+00 )		; <float> [#uses=0]
+	br i1 %tmp16, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+declare float @floorf(float)

diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
new file mode 100644
index 0000000..67323e8
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 | grep lea
+
+	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+	%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+	%0 = malloc %struct.anon		; <%struct.anon*> [#uses=2]
+	%1 = getelementptr %struct.anon* %0, i32 0, i32 2		; <%struct.node**> [#uses=1]
+	br label %bb14.i
+
+bb14.i:		; preds = %bb14.i, %entry
+	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %2, %bb14.i ]		; <i32> [#uses=1]
+	%2 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond74.i = icmp eq i32 %2, 32		; <i1> [#uses=1]
+	br i1 %exitcond74.i, label %bb32.i, label %bb14.i
+
+bb32.i:		; preds = %bb32.i, %bb14.i
+	%tmp.0.reg2mem.0.i = phi i32 [ %indvar.next63.i, %bb32.i ], [ 0, %bb14.i ]		; <i32> [#uses=1]
+	%indvar.next63.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond64.i = icmp eq i32 %indvar.next63.i, 64		; <i1> [#uses=1]
+	br i1 %exitcond64.i, label %bb47.loopexit.i, label %bb32.i
+
+bb.i.i:		; preds = %bb47.loopexit.i
+	unreachable
+
+stepsystem.exit.i:		; preds = %bb47.loopexit.i
+	store %struct.node* null, %struct.node** %1, align 4
+	br label %bb.i6.i
+
+bb.i6.i:		; preds = %bb.i6.i, %stepsystem.exit.i
+	%tmp.0.i.i = add i32 0, -1		; <i32> [#uses=1]
+	%3 = icmp slt i32 %tmp.0.i.i, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb107.i.i, label %bb.i6.i
+
+bb107.i.i:		; preds = %bb107.i.i, %bb.i6.i
+	%q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %4, %bb.i6.i ]		; <%struct.bnode**> [#uses=1]
+	%q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in		; <%struct.bnode*> [#uses=0]
+	br label %bb107.i.i
+
+bb47.loopexit.i:		; preds = %bb32.i
+	%4 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 0		; <%struct.bnode**> [#uses=1]
+	%5 = icmp eq %struct.node* null, null		; <i1> [#uses=1]
+	br i1 %5, label %stepsystem.exit.i, label %bb.i.i
+}

diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
new file mode 100644
index 0000000..fc11347
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 | not grep movb
+
+define i16 @f(i32* %bp, i32* %ss) signext  {
+entry:
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_next391, %entry
+	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=1]
+	%tmp149 = mul i32 0, %v.1		; <i32> [#uses=0]
+	%tmp254 = and i32 0, 15		; <i32> [#uses=1]
+	%tmp256 = and i32 0, 15		; <i32> [#uses=2]
+	br i1 false, label %cond_true267, label %cond_next391
+
+cond_true267:		; preds = %cond_next127
+	ret i16 0
+
+cond_next391:		; preds = %cond_next127
+	%tmp393 = load i32* %ss, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32* %bp, align 4		; <i32> [#uses=2]
+	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=2]
+	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=2]
+	%tmp399 = lshr i32 %tmp396, %tmp398		; <i32> [#uses=1]
+	%tmp405 = lshr i32 %tmp396, 31		; <i32> [#uses=1]
+	%tmp406 = add i32 %tmp405, -1		; <i32> [#uses=1]
+	%tmp409 = lshr i32 %tmp406, %tmp398		; <i32> [#uses=1]
+	%tmp411 = sub i32 %tmp399, %tmp409		; <i32> [#uses=1]
+	%tmp422445 = add i32 %tmp254, 0		; <i32> [#uses=1]
+	%tmp426447 = add i32 %tmp395, %tmp256		; <i32> [#uses=1]
+	store i32 %tmp426447, i32* %bp, align 4
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %cond_next127, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}

diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
new file mode 100644
index 0000000..ea1bbc4
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep addss | not grep esp
+
+define fastcc void @fht(float* %fz, i16 signext  %n) {
+entry:
+	br i1 true, label %bb171.preheader, label %bb431
+
+bb171.preheader:		; preds = %entry
+	%tmp176 = fadd float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
+	%gi.1 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
+	%tmp240 = load float* %gi.1, align 4		; <float> [#uses=1]
+	%tmp242 = fsub float %tmp240, 0.000000e+00		; <float> [#uses=2]
+	%tmp251 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
+	%tmp252 = load float* %tmp251, align 4		; <float> [#uses=1]
+	%tmp258 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
+	%tmp259 = load float* %tmp258, align 4		; <float> [#uses=2]
+	%tmp261 = fmul float %tmp259, %tmp176		; <float> [#uses=1]
+	%tmp262 = fsub float 0.000000e+00, %tmp261		; <float> [#uses=2]
+	%tmp269 = fmul float %tmp252, %tmp176		; <float> [#uses=1]
+	%tmp276 = fmul float %tmp259, 0.000000e+00		; <float> [#uses=1]
+	%tmp277 = fadd float %tmp269, %tmp276		; <float> [#uses=2]
+	%tmp281 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
+	%tmp282 = load float* %tmp281, align 4		; <float> [#uses=2]
+	%tmp284 = fsub float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp291 = fadd float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp298 = fsub float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp305 = fadd float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp315 = fmul float 0.000000e+00, %tmp291		; <float> [#uses=1]
+	%tmp318 = fmul float 0.000000e+00, %tmp298		; <float> [#uses=1]
+	%tmp319 = fadd float %tmp315, %tmp318		; <float> [#uses=1]
+	%tmp329 = fadd float 0.000000e+00, %tmp319		; <float> [#uses=1]
+	store float %tmp329, float* null, align 4
+	%tmp336 = fsub float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	store float %tmp336, float* %tmp258, align 4
+	%tmp343 = fadd float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	store float %tmp343, float* null, align 4
+	%tmp355 = fmul float 0.000000e+00, %tmp305		; <float> [#uses=1]
+	%tmp358 = fmul float 0.000000e+00, %tmp284		; <float> [#uses=1]
+	%tmp359 = fadd float %tmp355, %tmp358		; <float> [#uses=1]
+	%tmp369 = fadd float 0.000000e+00, %tmp359		; <float> [#uses=1]
+	store float %tmp369, float* %gi.1, align 4
+	ret void
+
+bb431:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
new file mode 100644
index 0000000..a3872ad
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=x86 | grep sarl | not grep esp
+
+define i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) signext  {
+entry:
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_next391, %entry
+	%tmp151 = add i32 0, %round		; <i32> [#uses=1]
+	%tmp153 = ashr i32 %tmp151, %scale		; <i32> [#uses=1]
+	%tmp158 = xor i32 0, %tmp153		; <i32> [#uses=1]
+	%tmp160 = or i32 %tmp158, 0		; <i32> [#uses=1]
+	%tmp180181 = sext i16 0 to i32		; <i32> [#uses=1]
+	%tmp183 = add i32 %tmp160, 1		; <i32> [#uses=1]
+	br i1 false, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %cond_next127
+	ret i16 0
+
+cond_next245:		; preds = %cond_next127
+	%tmp253444 = lshr i32 %tmp180181, 4		; <i32> [#uses=1]
+	%tmp254 = and i32 %tmp253444, 15		; <i32> [#uses=1]
+	br i1 false, label %cond_true267, label %cond_next391
+
+cond_true267:		; preds = %cond_next245
+	%tmp269 = load i8** %byteptr, align 4		; <i8*> [#uses=3]
+	%tmp270 = load i8* %tmp269, align 1		; <i8> [#uses=1]
+	%tmp270271 = zext i8 %tmp270 to i32		; <i32> [#uses=1]
+	%tmp272 = getelementptr i8* %tmp269, i32 1		; <i8*> [#uses=2]
+	store i8* %tmp272, i8** %byteptr, align 4
+	%tmp276 = load i8* %tmp272, align 1		; <i8> [#uses=1]
+	%tmp278 = getelementptr i8* %tmp269, i32 2		; <i8*> [#uses=1]
+	store i8* %tmp278, i8** %byteptr, align 4
+	%tmp286 = icmp eq i32 %tmp270271, %markerPrefix		; <i1> [#uses=1]
+	%cond = icmp eq i8 %tmp276, 0		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp286, %cond		; <i1> [#uses=1]
+	br i1 %bothcond, label %cond_true294, label %cond_next327
+
+cond_true294:		; preds = %cond_true267
+	ret i16 0
+
+cond_next327:		; preds = %cond_true267
+	br i1 false, label %cond_true343, label %cond_next391
+
+cond_true343:		; preds = %cond_next327
+	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	store i8* null, i8** %byteptr, align 4
+	store i8* %tmp345, i8** %byteptr, align 4
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_true343, %cond_next327, %cond_next245
+	%tmp422445 = add i32 %tmp254, %tmp183		; <i32> [#uses=1]
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %cond_next127, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}

diff --git a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
new file mode 100644
index 0000000..8a55935
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+        %struct._Unwind_Context = type {  }
+
+define i32 @execute_stack_op(i8* %op_ptr, i8* %op_end, %struct._Unwind_Context* %context, i64 %initial) {
+entry:
+        br i1 false, label %bb, label %return
+
+bb:             ; preds = %bb31, %entry
+        br i1 false, label %bb6, label %bb31
+
+bb6:            ; preds = %bb
+        %tmp10 = load i64* null, align 8                ; <i64> [#uses=1]
+        %tmp16 = load i64* null, align 8                ; <i64> [#uses=1]
+        br i1 false, label %bb23, label %bb31
+
+bb23:           ; preds = %bb6
+        %tmp2526.cast = and i64 %tmp16, 4294967295              ; <i64> [#uses=1]
+        %tmp27 = ashr i64 %tmp10, %tmp2526.cast         ; <i64> [#uses=1]
+        br label %bb31
+
+bb31:           ; preds = %bb23, %bb6, %bb
+        %result.0 = phi i64 [ %tmp27, %bb23 ], [ 0, %bb ], [ 0, %bb6 ]          ; <i64> [#uses=0]
+        br i1 false, label %bb, label %return
+
+return:         ; preds = %bb31, %entry
+        ret i32 undef
+}

diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
new file mode 100644
index 0000000..1e4ae84
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll

@@ -0,0 +1,400 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu
+; PR1729
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.addr_diff_vec_flags = type <{ i8, i8, i8, i8 }>
+	%struct.alloc_pool_def = type { i8*, i64, i64, %struct.alloc_pool_list_def*, i64, i64, i64, %struct.alloc_pool_list_def*, i64, i64 }
+	%struct.alloc_pool_list_def = type { %struct.alloc_pool_list_def* }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type opaque
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cselib_val_struct = type opaque
+	%struct.dataflow_d = type opaque
+	%struct.die_struct = type opaque
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_iterator = type { i32, %struct.VEC_edge** }
+	%struct.eh_status = type opaque
+	%struct.elt_list = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.ht_identifier = type { i8*, i32, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type opaque
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.mem_attrs = type { i64, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.phi_arg_d = type { %struct.tree_node*, i8 }
+	%struct.ptr_info_def = type opaque
+	%struct.real_value = type opaque
+	%struct.reg_attrs = type { %struct.tree_node*, i64 }
+	%struct.reg_info_def = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.temp_slot = type opaque
+	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
+	%struct.tree_block = type { %struct.tree_common, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u1_a = type <{ i32 }>
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_exp = type { %struct.tree_common, %struct.location_t*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
+	%struct.tree_int_cst_lowhi = type { i64, i64 }
+	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
+	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
+	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
+	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
+	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.tree_type_symtab = type { i8* }
+	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
+	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
+	%struct.u = type { [1 x %struct.rtunion] }
+	%struct.value_set = type opaque
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data }
+	%union.tree_ann_d = type opaque
+@first_edge_aux_obj = external global i8*		; <i8**> [#uses=0]
+@first_block_aux_obj = external global i8*		; <i8**> [#uses=0]
+@n_edges = external global i32		; <i32*> [#uses=0]
+@ENTRY_BLOCK_PTR = external global %struct.basic_block_def*		; <%struct.basic_block_def**> [#uses=0]
+@EXIT_BLOCK_PTR = external global %struct.basic_block_def*		; <%struct.basic_block_def**> [#uses=0]
+@n_basic_blocks = external global i32		; <i32*> [#uses=0]
+@.str = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@rbi_pool = external global %struct.alloc_pool_def*		; <%struct.alloc_pool_def**> [#uses=0]
+@__FUNCTION__.19643 = external constant [18 x i8]		; <[18 x i8]*> [#uses=0]
+@.str1 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.19670 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@basic_block_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=0]
+@last_basic_block = external global i32		; <i32*> [#uses=0]
+@__FUNCTION__.19696 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@__FUNCTION__.20191 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@block_aux_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=0]
+@__FUNCTION__.20301 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.20316 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@edge_aux_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=0]
+@stderr = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=0]
+@__FUNCTION__.20463 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str2 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str3 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str4 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str5 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str6 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str7 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@bitnames.20157 = external constant [13 x i8*]		; <[13 x i8*]*> [#uses=0]
+@.str8 = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@.str9 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str10 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str11 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str12 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str13 = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@.str14 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str15 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str16 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str17 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str18 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str19 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str20 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str21 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str22 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@__FUNCTION__.19709 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@.str23 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str24 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@__FUNCTION__.19813 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str25 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str26 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@initialized.20241.b = external global i1		; <i1*> [#uses=0]
+@__FUNCTION__.20244 = external constant [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.19601 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.14571 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.14535 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str27 = external constant [28 x i8]		; <[28 x i8]*> [#uses=0]
+@__FUNCTION__.14589 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.19792 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.19851 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@profile_status = external global i32		; <i32*> [#uses=0]
+@.str29 = external constant [46 x i8]		; <[46 x i8]*> [#uses=0]
+@.str30 = external constant [49 x i8]		; <[49 x i8]*> [#uses=0]
+@.str31 = external constant [54 x i8]		; <[54 x i8]*> [#uses=0]
+@.str32 = external constant [49 x i8]		; <[49 x i8]*> [#uses=1]
+@__FUNCTION__.19948 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@reg_n_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=0]
+@reload_completed = external global i32		; <i32*> [#uses=0]
+@.str33 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str34 = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@.str35 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str36 = external constant [1 x i8]		; <[1 x i8]*> [#uses=0]
+@.str37 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+@.str38 = external constant [16 x i8]		; <[16 x i8]*> [#uses=0]
+@cfun = external global %struct.function*		; <%struct.function**> [#uses=0]
+@.str39 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@.str40 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str41 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@.str42 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str43 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@mode_size = external global [48 x i8]		; <[48 x i8]*> [#uses=0]
+@target_flags = external global i32		; <i32*> [#uses=0]
+@.str44 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@reg_class_names = external global [0 x i8*]		; <[0 x i8*]*> [#uses=0]
+@.str45 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str46 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str47 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str48 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str49 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str50 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str51 = external constant [29 x i8]		; <[29 x i8]*> [#uses=0]
+@.str52 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str53 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str54 = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@.str55 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str56 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str57 = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
+@.str58 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str59 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@.str60 = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
+@.str61 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@initialized.20366.b = external global i1		; <i1*> [#uses=0]
+@__FUNCTION__.20369 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.20442 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@bb_bitnames.20476 = external constant [6 x i8*]		; <[6 x i8*]*> [#uses=0]
+@.str62 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str63 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str64 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str65 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str66 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str67 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str68 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str69 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str70 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@__FUNCTION__.20520 = external constant [32 x i8]		; <[32 x i8]*> [#uses=0]
+@dump_file = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=0]
+@.str71 = external constant [86 x i8]		; <[86 x i8]*> [#uses=0]
+@.str72 = external constant [94 x i8]		; <[94 x i8]*> [#uses=0]
+@reg_obstack = external global %struct.bitmap_obstack		; <%struct.bitmap_obstack*> [#uses=0]
+
+declare void @init_flow()
+
+declare i8* @ggc_alloc_cleared_stat(i64)
+
+declare fastcc void @free_edge(%struct.edge_def*)
+
+declare void @ggc_free(i8*)
+
+declare %struct.basic_block_def* @alloc_block()
+
+declare void @alloc_rbi_pool()
+
+declare %struct.alloc_pool_def* @create_alloc_pool(i8*, i64, i64)
+
+declare void @free_rbi_pool()
+
+declare void @free_alloc_pool(%struct.alloc_pool_def*)
+
+declare void @initialize_bb_rbi(%struct.basic_block_def*)
+
+declare void @fancy_abort(i8*, i32, i8*)
+
+declare i8* @pool_alloc(%struct.alloc_pool_def*)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+declare void @link_block(%struct.basic_block_def*, %struct.basic_block_def*)
+
+declare void @unlink_block(%struct.basic_block_def*)
+
+declare void @compact_blocks()
+
+declare void @varray_check_failed(%struct.varray_head_tag*, i64, i8*, i32, i8*)
+
+declare void @expunge_block(%struct.basic_block_def*)
+
+declare void @clear_bb_flags()
+
+declare void @alloc_aux_for_block(%struct.basic_block_def*, i32)
+
+declare void @_obstack_newchunk(%struct.obstack*, i32)
+
+declare void @clear_aux_for_blocks()
+
+declare void @free_aux_for_blocks()
+
+declare void @obstack_free(%struct.obstack*, i8*)
+
+declare void @alloc_aux_for_edge(%struct.edge_def*, i32)
+
+declare void @debug_bb(%struct.basic_block_def*)
+
+declare void @dump_bb(%struct.basic_block_def*, %struct._IO_FILE*, i32)
+
+declare %struct.basic_block_def* @debug_bb_n(i32)
+
+declare void @dump_edge_info(%struct._IO_FILE*, %struct.edge_def*, i32)
+
+declare i32 @fputs_unlocked(i8* noalias , %struct._IO_FILE* noalias )
+
+declare i32 @fprintf(%struct._IO_FILE* noalias , i8* noalias , ...)
+
+declare i64 @fwrite(i8*, i64, i64, i8*)
+
+declare i32 @__overflow(%struct._IO_FILE*, i32)
+
+declare %struct.edge_def* @unchecked_make_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare i8* @vec_gc_p_reserve(i8*, i32)
+
+declare void @vec_assert_fail(i8*, i8*, i8*, i32, i8*)
+
+declare void @execute_on_growing_pred(%struct.edge_def*)
+
+declare %struct.edge_def* @make_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare %struct.edge_def* @find_edge(%struct.basic_block_def*, %struct.basic_block_def*)
+
+declare %struct.edge_def* @make_single_succ_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare %struct.edge_def* @cached_make_edge(%struct.simple_bitmap_def**, %struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare void @redirect_edge_succ(%struct.edge_def*, %struct.basic_block_def*)
+
+declare void @execute_on_shrinking_pred(%struct.edge_def*)
+
+declare void @alloc_aux_for_blocks(i32)
+
+declare i8* @xmalloc(i64)
+
+declare i32 @_obstack_begin(%struct.obstack*, i32, i32, i8* (i64)*, void (i8*)*)
+
+declare void @free(i8*)
+
+declare void @clear_edges()
+
+declare void @remove_edge(%struct.edge_def*)
+
+declare %struct.edge_def* @redirect_edge_succ_nodup(%struct.edge_def*, %struct.basic_block_def*)
+
+declare void @redirect_edge_pred(%struct.edge_def*, %struct.basic_block_def*)
+
+define void @check_bb_profile(%struct.basic_block_def* %bb, %struct._IO_FILE* %file) {
+entry:
+	br i1 false, label %cond_false759.preheader, label %cond_false149.preheader
+
+cond_false149.preheader:		; preds = %entry
+	ret void
+
+cond_false759.preheader:		; preds = %entry
+	br i1 false, label %cond_next873, label %cond_true794
+
+bb644:		; preds = %cond_next873
+	ret void
+
+cond_true794:		; preds = %cond_false759.preheader
+	ret void
+
+cond_next873:		; preds = %cond_false759.preheader
+	br i1 false, label %bb882, label %bb644
+
+bb882:		; preds = %cond_next873
+	br i1 false, label %cond_true893, label %cond_next901
+
+cond_true893:		; preds = %bb882
+	br label %cond_false1036
+
+cond_next901:		; preds = %bb882
+	ret void
+
+bb929:		; preds = %cond_next1150
+	%tmp934 = add i64 0, %lsum.11225.0		; <i64> [#uses=1]
+	br i1 false, label %cond_next979, label %cond_true974
+
+cond_true974:		; preds = %bb929
+	ret void
+
+cond_next979:		; preds = %bb929
+	br label %cond_false1036
+
+cond_false1036:		; preds = %cond_next979, %cond_true893
+	%lsum.11225.0 = phi i64 [ 0, %cond_true893 ], [ %tmp934, %cond_next979 ]		; <i64> [#uses=2]
+	br i1 false, label %cond_next1056, label %cond_true1051
+
+cond_true1051:		; preds = %cond_false1036
+	ret void
+
+cond_next1056:		; preds = %cond_false1036
+	br i1 false, label %cond_next1150, label %cond_true1071
+
+cond_true1071:		; preds = %cond_next1056
+	ret void
+
+cond_next1150:		; preds = %cond_next1056
+	%tmp1156 = icmp eq %struct.edge_def* null, null		; <i1> [#uses=1]
+	br i1 %tmp1156, label %bb1159, label %bb929
+
+bb1159:		; preds = %cond_next1150
+	br i1 false, label %cond_true1169, label %UnifiedReturnBlock
+
+cond_true1169:		; preds = %bb1159
+	%tmp11741175 = trunc i64 %lsum.11225.0 to i32		; <i32> [#uses=1]
+	%tmp1178 = tail call i32 (%struct._IO_FILE* noalias , i8* noalias , ...)* @fprintf( %struct._IO_FILE* %file noalias , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0) noalias , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %bb1159
+	ret void
+}
+
+declare void @dump_flow_info(%struct._IO_FILE*)
+
+declare i32 @max_reg_num()
+
+declare void @rtl_check_failed_flag(i8*, %struct.rtx_def*, i8*, i32, i8*)
+
+declare i32 @reg_preferred_class(i32)
+
+declare i32 @reg_alternate_class(i32)
+
+declare i8 @maybe_hot_bb_p(%struct.basic_block_def*) zeroext 
+
+declare i8 @probably_never_executed_bb_p(%struct.basic_block_def*) zeroext 
+
+declare void @dump_regset(%struct.bitmap_head_def*, %struct._IO_FILE*)
+
+declare void @debug_flow_info()
+
+declare void @alloc_aux_for_edges(i32)
+
+declare void @clear_aux_for_edges()
+
+declare void @free_aux_for_edges()
+
+declare void @brief_dump_cfg(%struct._IO_FILE*)
+
+declare i32 @fputc(i32, i8*)
+
+declare void @update_bb_profile_for_threading(%struct.basic_block_def*, i32, i64, %struct.edge_def*)

diff --git a/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
new file mode 100644
index 0000000..fbcac50
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+define i64 @__ashldi3(i64 %u, i64 %b) {
+entry:
+        br i1 false, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:              ; preds = %entry
+        %tmp9 = sub i64 32, %b          ; <i64> [#uses=2]
+        %tmp11 = icmp slt i64 %tmp9, 1          ; <i1> [#uses=1]
+        %tmp2180 = trunc i64 %u to i32          ; <i32> [#uses=2]
+        %tmp2223 = trunc i64 %tmp9 to i32               ; <i32> [#uses=2]
+        br i1 %tmp11, label %cond_true14, label %cond_false
+
+cond_true14:            ; preds = %cond_next
+        %tmp24 = sub i32 0, %tmp2223            ; <i32> [#uses=1]
+        %tmp25 = shl i32 %tmp2180, %tmp24               ; <i32> [#uses=1]
+        %tmp2569 = zext i32 %tmp25 to i64               ; <i64> [#uses=1]
+        %tmp256970 = shl i64 %tmp2569, 32               ; <i64> [#uses=1]
+        ret i64 %tmp256970
+
+cond_false:             ; preds = %cond_next
+        %tmp35 = lshr i32 %tmp2180, %tmp2223            ; <i32> [#uses=1]
+        %tmp54 = or i32 %tmp35, 0               ; <i32> [#uses=1]
+        %tmp5464 = zext i32 %tmp54 to i64               ; <i64> [#uses=1]
+        %tmp546465 = shl i64 %tmp5464, 32               ; <i64> [#uses=1]
+        %tmp546465.ins = or i64 %tmp546465, 0           ; <i64> [#uses=1]
+        ret i64 %tmp546465.ins
+
+UnifiedReturnBlock:
+        ret i64 %u
+}

diff --git a/test/CodeGen/X86/2007-10-16-IllegalAsm.ll b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll
new file mode 100644
index 0000000..6d0cb47
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-16-IllegalAsm.ll

@@ -0,0 +1,272 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep movb | not grep x
+; PR1734
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.stack_local_entry = type opaque
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.u = type { [1 x %struct.rtunion] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data }
+	%union.tree_ann_d = type opaque
+@.str = external constant [28 x i8]		; <[28 x i8]*> [#uses=1]
+@tree_code_type = external constant [0 x i32]		; <[0 x i32]*> [#uses=5]
+@global_trees = external global [47 x %struct.tree_node*]		; <[47 x %struct.tree_node*]*> [#uses=1]
+@mode_size = external global [48 x i8]		; <[48 x i8]*> [#uses=1]
+@__FUNCTION__.22683 = external constant [12 x i8]		; <[12 x i8]*> [#uses=1]
+
+define void @layout_type(%struct.tree_node* %type) {
+entry:
+	%tmp15 = icmp eq %struct.tree_node* %type, null		; <i1> [#uses=1]
+	br i1 %tmp15, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	tail call void @fancy_abort( i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1713, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_false:		; preds = %entry
+	%tmp19 = load %struct.tree_node** getelementptr ([47 x %struct.tree_node*]* @global_trees, i32 0, i64 0), align 8		; <%struct.tree_node*> [#uses=1]
+	%tmp21 = icmp eq %struct.tree_node* %tmp19, %type		; <i1> [#uses=1]
+	br i1 %tmp21, label %UnifiedReturnBlock, label %cond_next25
+
+cond_next25:		; preds = %cond_false
+	%tmp30 = getelementptr %struct.tree_node* %type, i32 0, i32 0, i32 0, i32 3		; <i8*> [#uses=1]
+	%tmp3031 = bitcast i8* %tmp30 to i32*		; <i32*> [#uses=6]
+	%tmp32 = load i32* %tmp3031, align 8		; <i32> [#uses=3]
+	%tmp3435 = trunc i32 %tmp32 to i8		; <i8> [#uses=3]
+	%tmp34353637 = zext i8 %tmp3435 to i64		; <i64> [#uses=1]
+	%tmp38 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %tmp34353637		; <i32*> [#uses=1]
+	%tmp39 = load i32* %tmp38, align 4		; <i32> [#uses=1]
+	%tmp40 = icmp eq i32 %tmp39, 2		; <i1> [#uses=4]
+	br i1 %tmp40, label %cond_next46, label %cond_true43
+
+cond_true43:		; preds = %cond_next25
+	tail call void @tree_class_check_failed( %struct.tree_node* %type, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1719, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next46:		; preds = %cond_next25
+	%tmp4950 = bitcast %struct.tree_node* %type to %struct.tree_type*		; <%struct.tree_type*> [#uses=2]
+	%tmp51 = getelementptr %struct.tree_type* %tmp4950, i32 0, i32 2		; <%struct.tree_node**> [#uses=2]
+	%tmp52 = load %struct.tree_node** %tmp51, align 8		; <%struct.tree_node*> [#uses=1]
+	%tmp53 = icmp eq %struct.tree_node* %tmp52, null		; <i1> [#uses=1]
+	br i1 %tmp53, label %cond_next57, label %UnifiedReturnBlock
+
+cond_next57:		; preds = %cond_next46
+	%tmp65 = and i32 %tmp32, 255		; <i32> [#uses=1]
+	switch i32 %tmp65, label %UnifiedReturnBlock [
+		 i32 6, label %bb140
+		 i32 7, label %bb69
+		 i32 8, label %bb140
+		 i32 13, label %bb478
+		 i32 23, label %bb
+	]
+
+bb:		; preds = %cond_next57
+	tail call void @fancy_abort( i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1727, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+bb69:		; preds = %cond_next57
+	br i1 %tmp40, label %cond_next91, label %cond_true88
+
+cond_true88:		; preds = %bb69
+	tail call void @tree_class_check_failed( %struct.tree_node* %type, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1730, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next91:		; preds = %bb69
+	%tmp96 = getelementptr %struct.tree_node* %type, i32 0, i32 0, i32 8		; <i8*> [#uses=1]
+	%tmp9697 = bitcast i8* %tmp96 to i32*		; <i32*> [#uses=2]
+	%tmp98 = load i32* %tmp9697, align 8		; <i32> [#uses=2]
+	%tmp100101552 = and i32 %tmp98, 511		; <i32> [#uses=1]
+	%tmp102 = icmp eq i32 %tmp100101552, 0		; <i1> [#uses=1]
+	br i1 %tmp102, label %cond_true105, label %bb140
+
+cond_true105:		; preds = %cond_next91
+	br i1 %tmp40, label %cond_next127, label %cond_true124
+
+cond_true124:		; preds = %cond_true105
+	tail call void @tree_class_check_failed( %struct.tree_node* %type, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1731, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next127:		; preds = %cond_true105
+	%tmp136 = or i32 %tmp98, 1		; <i32> [#uses=1]
+	%tmp137 = and i32 %tmp136, -511		; <i32> [#uses=1]
+	store i32 %tmp137, i32* %tmp9697, align 8
+	br label %bb140
+
+bb140:		; preds = %cond_next127, %cond_next91, %cond_next57, %cond_next57
+	switch i8 %tmp3435, label %cond_true202 [
+		 i8 6, label %cond_next208
+		 i8 9, label %cond_next208
+		 i8 7, label %cond_next208
+		 i8 8, label %cond_next208
+		 i8 10, label %cond_next208
+	]
+
+cond_true202:		; preds = %bb140
+	tail call void (%struct.tree_node*, i8*, i32, i8*, ...)* @tree_check_failed( %struct.tree_node* %type, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1738, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0), i32 9, i32 6, i32 7, i32 8, i32 10, i32 0 )
+	unreachable
+
+cond_next208:		; preds = %bb140, %bb140, %bb140, %bb140, %bb140
+	%tmp213 = getelementptr %struct.tree_type* %tmp4950, i32 0, i32 14		; <%struct.tree_node**> [#uses=1]
+	%tmp214 = load %struct.tree_node** %tmp213, align 8		; <%struct.tree_node*> [#uses=2]
+	%tmp217 = getelementptr %struct.tree_node* %tmp214, i32 0, i32 0, i32 0, i32 3		; <i8*> [#uses=1]
+	%tmp217218 = bitcast i8* %tmp217 to i32*		; <i32*> [#uses=1]
+	%tmp219 = load i32* %tmp217218, align 8		; <i32> [#uses=1]
+	%tmp221222 = trunc i32 %tmp219 to i8		; <i8> [#uses=1]
+	%tmp223 = icmp eq i8 %tmp221222, 24		; <i1> [#uses=1]
+	br i1 %tmp223, label %cond_true226, label %cond_next340
+
+cond_true226:		; preds = %cond_next208
+	switch i8 %tmp3435, label %cond_true288 [
+		 i8 6, label %cond_next294
+		 i8 9, label %cond_next294
+		 i8 7, label %cond_next294
+		 i8 8, label %cond_next294
+		 i8 10, label %cond_next294
+	]
+
+cond_true288:		; preds = %cond_true226
+	tail call void (%struct.tree_node*, i8*, i32, i8*, ...)* @tree_check_failed( %struct.tree_node* %type, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1739, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0), i32 9, i32 6, i32 7, i32 8, i32 10, i32 0 )
+	unreachable
+
+cond_next294:		; preds = %cond_true226, %cond_true226, %cond_true226, %cond_true226, %cond_true226
+	%tmp301 = tail call i32 @tree_int_cst_sgn( %struct.tree_node* %tmp214 )		; <i32> [#uses=1]
+	%tmp302 = icmp sgt i32 %tmp301, -1		; <i1> [#uses=1]
+	br i1 %tmp302, label %cond_true305, label %cond_next340
+
+cond_true305:		; preds = %cond_next294
+	%tmp313 = load i32* %tmp3031, align 8		; <i32> [#uses=2]
+	%tmp315316 = trunc i32 %tmp313 to i8		; <i8> [#uses=1]
+	%tmp315316317318 = zext i8 %tmp315316 to i64		; <i64> [#uses=1]
+	%tmp319 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %tmp315316317318		; <i32*> [#uses=1]
+	%tmp320 = load i32* %tmp319, align 4		; <i32> [#uses=1]
+	%tmp321 = icmp eq i32 %tmp320, 2		; <i1> [#uses=1]
+	br i1 %tmp321, label %cond_next327, label %cond_true324
+
+cond_true324:		; preds = %cond_true305
+	tail call void @tree_class_check_failed( %struct.tree_node* %type, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1740, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next327:		; preds = %cond_true305
+	%tmp338 = or i32 %tmp313, 8192		; <i32> [#uses=1]
+	store i32 %tmp338, i32* %tmp3031, align 8
+	br label %cond_next340
+
+cond_next340:		; preds = %cond_next327, %cond_next294, %cond_next208
+	%tmp348 = load i32* %tmp3031, align 8		; <i32> [#uses=1]
+	%tmp350351 = trunc i32 %tmp348 to i8		; <i8> [#uses=1]
+	%tmp350351352353 = zext i8 %tmp350351 to i64		; <i64> [#uses=1]
+	%tmp354 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %tmp350351352353		; <i32*> [#uses=1]
+	%tmp355 = load i32* %tmp354, align 4		; <i32> [#uses=1]
+	%tmp356 = icmp eq i32 %tmp355, 2		; <i1> [#uses=1]
+	br i1 %tmp356, label %cond_next385, label %cond_true359
+
+cond_true359:		; preds = %cond_next340
+	tail call void @tree_class_check_failed( %struct.tree_node* %type, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1742, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next385:		; preds = %cond_next340
+	%tmp390 = getelementptr %struct.tree_node* %type, i32 0, i32 0, i32 8		; <i8*> [#uses=1]
+	%tmp390391 = bitcast i8* %tmp390 to i32*		; <i32*> [#uses=3]
+	%tmp392 = load i32* %tmp390391, align 8		; <i32> [#uses=1]
+	%tmp394 = and i32 %tmp392, 511		; <i32> [#uses=1]
+	%tmp397 = tail call i32 @smallest_mode_for_size( i32 %tmp394, i32 2 )		; <i32> [#uses=1]
+	%tmp404 = load i32* %tmp390391, align 8		; <i32> [#uses=1]
+	%tmp397398405 = shl i32 %tmp397, 9		; <i32> [#uses=1]
+	%tmp407 = and i32 %tmp397398405, 65024		; <i32> [#uses=1]
+	%tmp408 = and i32 %tmp404, -65025		; <i32> [#uses=1]
+	%tmp409 = or i32 %tmp408, %tmp407		; <i32> [#uses=2]
+	store i32 %tmp409, i32* %tmp390391, align 8
+	%tmp417 = load i32* %tmp3031, align 8		; <i32> [#uses=1]
+	%tmp419420 = trunc i32 %tmp417 to i8		; <i8> [#uses=1]
+	%tmp419420421422 = zext i8 %tmp419420 to i64		; <i64> [#uses=1]
+	%tmp423 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %tmp419420421422		; <i32*> [#uses=1]
+	%tmp424 = load i32* %tmp423, align 4		; <i32> [#uses=1]
+	%tmp425 = icmp eq i32 %tmp424, 2		; <i1> [#uses=1]
+	br i1 %tmp425, label %cond_next454, label %cond_true428
+
+cond_true428:		; preds = %cond_next385
+	tail call void @tree_class_check_failed( %struct.tree_node* %type, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1744, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next454:		; preds = %cond_next385
+	lshr i32 %tmp409, 9		; <i32>:0 [#uses=1]
+	trunc i32 %0 to i8		; <i8>:1 [#uses=1]
+	%tmp463464 = and i8 %1, 127		; <i8> [#uses=1]
+	%tmp463464465466 = zext i8 %tmp463464 to i64		; <i64> [#uses=1]
+	%tmp467 = getelementptr [48 x i8]* @mode_size, i32 0, i64 %tmp463464465466		; <i8*> [#uses=1]
+	%tmp468 = load i8* %tmp467, align 1		; <i8> [#uses=1]
+	%tmp468469553 = zext i8 %tmp468 to i16		; <i16> [#uses=1]
+	%tmp470471 = shl i16 %tmp468469553, 3		; <i16> [#uses=1]
+	%tmp470471472 = zext i16 %tmp470471 to i64		; <i64> [#uses=1]
+	%tmp473 = tail call %struct.tree_node* @size_int_kind( i64 %tmp470471472, i32 2 )		; <%struct.tree_node*> [#uses=1]
+	store %struct.tree_node* %tmp473, %struct.tree_node** %tmp51, align 8
+	ret void
+
+bb478:		; preds = %cond_next57
+	br i1 %tmp40, label %cond_next500, label %cond_true497
+
+cond_true497:		; preds = %bb478
+	tail call void @tree_class_check_failed( %struct.tree_node* %type, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1755, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next500:		; preds = %bb478
+	%tmp506 = getelementptr %struct.tree_node* %type, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp507 = load %struct.tree_node** %tmp506, align 8		; <%struct.tree_node*> [#uses=2]
+	%tmp511 = getelementptr %struct.tree_node* %tmp507, i32 0, i32 0, i32 0, i32 3		; <i8*> [#uses=1]
+	%tmp511512 = bitcast i8* %tmp511 to i32*		; <i32*> [#uses=1]
+	%tmp513 = load i32* %tmp511512, align 8		; <i32> [#uses=2]
+	%tmp515516 = trunc i32 %tmp513 to i8		; <i8> [#uses=1]
+	%tmp515516517518 = zext i8 %tmp515516 to i64		; <i64> [#uses=1]
+	%tmp519 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %tmp515516517518		; <i32*> [#uses=1]
+	%tmp520 = load i32* %tmp519, align 4		; <i32> [#uses=1]
+	%tmp521 = icmp eq i32 %tmp520, 2		; <i1> [#uses=1]
+	br i1 %tmp521, label %cond_next527, label %cond_true524
+
+cond_true524:		; preds = %cond_next500
+	tail call void @tree_class_check_failed( %struct.tree_node* %tmp507, i32 2, i8* getelementptr ([28 x i8]* @.str, i32 0, i64 0), i32 1755, i8* getelementptr ([12 x i8]* @__FUNCTION__.22683, i32 0, i32 0) )
+	unreachable
+
+cond_next527:		; preds = %cond_next500
+	%tmp545 = and i32 %tmp513, 8192		; <i32> [#uses=1]
+	%tmp547 = and i32 %tmp32, -8193		; <i32> [#uses=1]
+	%tmp548 = or i32 %tmp547, %tmp545		; <i32> [#uses=1]
+	store i32 %tmp548, i32* %tmp3031, align 8
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next57, %cond_next46, %cond_false
+	ret void
+}
+
+declare void @fancy_abort(i8*, i32, i8*)
+
+declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*)
+
+declare i32 @smallest_mode_for_size(i32, i32)
+
+declare %struct.tree_node* @size_int_kind(i64, i32)
+
+declare void @tree_check_failed(%struct.tree_node*, i8*, i32, i8*, ...)
+
+declare i32 @tree_int_cst_sgn(%struct.tree_node*)

diff --git a/test/CodeGen/X86/2007-10-16-fp80_select.ll b/test/CodeGen/X86/2007-10-16-fp80_select.ll
new file mode 100644
index 0000000..3f9845c
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-16-fp80_select.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+        %struct.wxPoint2DInt = type { i32, i32 }
+
+define x86_fp80 @_ZNK12wxPoint2DInt14GetVectorAngleEv(%struct.wxPoint2DInt* %this) {
+entry:
+        br i1 false, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:              ; preds = %entry
+        %tmp8 = load i32* null, align 4         ; <i32> [#uses=1]
+        %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
+        %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000           ; <x86_fp80> [#uses=1]
+        ret x86_fp80 %retval
+
+UnifiedReturnBlock:             ; preds = %entry
+        ret x86_fp80 0xK4005B400000000000000
+}

diff --git a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
new file mode 100644
index 0000000..c0bb55e
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll

@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep addb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
+; PR1734
+
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.stack_local_entry = type opaque
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x %struct.rtunion] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data }
+	%union.tree_ann_d = type opaque
+
+define void @layout_type(%struct.tree_node* %type) {
+entry:
+	%tmp32 = load i32* null, align 8		; <i32> [#uses=3]
+	%tmp3435 = trunc i32 %tmp32 to i8		; <i8> [#uses=1]
+	%tmp53 = icmp eq %struct.tree_node* null, null		; <i1> [#uses=1]
+	br i1 %tmp53, label %cond_next57, label %UnifiedReturnBlock
+
+cond_next57:		; preds = %entry
+	%tmp65 = and i32 %tmp32, 255		; <i32> [#uses=1]
+	switch i32 %tmp65, label %UnifiedReturnBlock [
+		 i32 6, label %bb140
+		 i32 7, label %bb140
+		 i32 8, label %bb140
+		 i32 13, label %bb478
+	]
+
+bb140:		; preds = %cond_next57, %cond_next57, %cond_next57
+	%tmp219 = load i32* null, align 8		; <i32> [#uses=1]
+	%tmp221222 = trunc i32 %tmp219 to i8		; <i8> [#uses=1]
+	%tmp223 = icmp eq i8 %tmp221222, 24		; <i1> [#uses=1]
+	br i1 %tmp223, label %cond_true226, label %cond_next340
+
+cond_true226:		; preds = %bb140
+	switch i8 %tmp3435, label %cond_true288 [
+		 i8 6, label %cond_next340
+		 i8 9, label %cond_next340
+		 i8 7, label %cond_next340
+		 i8 8, label %cond_next340
+		 i8 10, label %cond_next340
+	]
+
+cond_true288:		; preds = %cond_true226
+	unreachable
+
+cond_next340:		; preds = %cond_true226, %cond_true226, %cond_true226, %cond_true226, %cond_true226, %bb140
+	ret void
+
+bb478:		; preds = %cond_next57
+	br i1 false, label %cond_next500, label %cond_true497
+
+cond_true497:		; preds = %bb478
+	unreachable
+
+cond_next500:		; preds = %bb478
+	%tmp513 = load i32* null, align 8		; <i32> [#uses=1]
+	%tmp545 = and i32 %tmp513, 8192		; <i32> [#uses=1]
+	%tmp547 = and i32 %tmp32, -8193		; <i32> [#uses=1]
+	%tmp548 = or i32 %tmp547, %tmp545		; <i32> [#uses=1]
+	store i32 %tmp548, i32* null, align 8
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next57, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
new file mode 100644
index 0000000..600bd1f
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+
+define i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) signext  {
+entry:
+	br label %bb
+
+bb:		; preds = %cond_next391, %entry
+	%cnt.0 = phi i32 [ 0, %entry ], [ %tmp422445, %cond_next391 ]		; <i32> [#uses=1]
+	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=0]
+	br i1 false, label %cond_true, label %cond_next127
+
+cond_true:		; preds = %bb
+	store i8* null, i8** %byteptr, align 4
+	store i8* null, i8** %byteptr, align 4
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_true, %bb
+	%tmp151 = add i32 0, %round		; <i32> [#uses=1]
+	%tmp153 = ashr i32 %tmp151, %scale		; <i32> [#uses=2]
+	%tmp154155 = trunc i32 %tmp153 to i16		; <i16> [#uses=1]
+	%tmp154155156 = sext i16 %tmp154155 to i32		; <i32> [#uses=1]
+	%tmp158 = xor i32 %tmp154155156, %tmp153		; <i32> [#uses=1]
+	%tmp160 = or i32 %tmp158, %cnt.0		; <i32> [#uses=1]
+	%tmp171 = load i32* %bitptr, align 4		; <i32> [#uses=1]
+	%tmp180181 = sext i16 0 to i32		; <i32> [#uses=3]
+	%tmp183 = add i32 %tmp160, 1		; <i32> [#uses=1]
+	br i1 false, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %cond_next127
+	ret i16 0
+
+cond_next245:		; preds = %cond_next127
+	%tmp249 = ashr i32 %tmp180181, 8		; <i32> [#uses=1]
+	%tmp250 = add i32 %tmp171, %tmp249		; <i32> [#uses=1]
+	%tmp253444 = lshr i32 %tmp180181, 4		; <i32> [#uses=1]
+	%tmp254 = and i32 %tmp253444, 15		; <i32> [#uses=1]
+	%tmp256 = and i32 %tmp180181, 15		; <i32> [#uses=2]
+	%tmp264 = icmp ugt i32 %tmp250, 15		; <i1> [#uses=1]
+	br i1 %tmp264, label %cond_true267, label %cond_next391
+
+cond_true267:		; preds = %cond_next245
+	store i8* null, i8** %byteptr, align 4
+	store i8* null, i8** %byteptr, align 4
+	br i1 false, label %cond_true289, label %cond_next327
+
+cond_true289:		; preds = %cond_true267
+	ret i16 0
+
+cond_next327:		; preds = %cond_true267
+	br i1 false, label %cond_true343, label %cond_next385
+
+cond_true343:		; preds = %cond_next327
+	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	store i8* null, i8** %byteptr, align 4
+	br i1 false, label %cond_next385, label %cond_true352
+
+cond_true352:		; preds = %cond_true343
+	store i8* %tmp345, i8** %byteptr, align 4
+	br i1 false, label %cond_true364, label %cond_next385
+
+cond_true364:		; preds = %cond_true352
+	ret i16 0
+
+cond_next385:		; preds = %cond_true352, %cond_true343, %cond_next327
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_next385, %cond_next245
+	%tmp393 = load i32* %source, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32* %bitptr, align 4		; <i32> [#uses=2]
+	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=1]
+	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=1]
+	%tmp405 = lshr i32 %tmp396, 31		; <i32> [#uses=1]
+	%tmp406 = add i32 %tmp405, -1		; <i32> [#uses=1]
+	%tmp409 = lshr i32 %tmp406, %tmp398		; <i32> [#uses=1]
+	%tmp411 = sub i32 0, %tmp409		; <i32> [#uses=1]
+	%tmp422445 = add i32 %tmp254, %tmp183		; <i32> [#uses=2]
+	%tmp426447 = add i32 %tmp395, %tmp256		; <i32> [#uses=1]
+	store i32 %tmp426447, i32* %bitptr, align 4
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}

diff --git a/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
new file mode 100644
index 0000000..984094d
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR1748
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @kernel_init(i8* %unused) {
+entry:
+	call void asm sideeffect "foo ${0:q}", "=*imr"( i64* null )
+	ret i32 0
+}
+

diff --git a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
new file mode 100644
index 0000000..86d3bbf
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+
+define i16 @t() signext  {
+entry:
+	%tmp180 = load i16* null, align 2		; <i16> [#uses=3]
+	%tmp180181 = sext i16 %tmp180 to i32		; <i32> [#uses=1]
+	%tmp185 = icmp slt i16 %tmp180, 0		; <i1> [#uses=1]
+	br i1 %tmp185, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %entry
+	%tmp195196 = trunc i16 %tmp180 to i8		; <i8> [#uses=0]
+	ret i16 0
+
+cond_next245:		; preds = %entry
+	%tmp256 = and i32 %tmp180181, 15		; <i32> [#uses=0]
+	ret i16 0
+}

diff --git a/test/CodeGen/X86/2007-10-30-LSRCrash.ll b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
new file mode 100644
index 0000000..42db98b
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-30-LSRCrash.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86
+
+define i32 @unique(i8* %full, i32 %p, i32 %len, i32 %mode, i32 %verbos, i32 %flags) {
+entry:
+	br i1 false, label %cond_true15, label %cond_next107
+
+cond_true15:		; preds = %entry
+	br i1 false, label %bb98.preheader, label %bb
+
+bb:		; preds = %cond_true15
+	ret i32 0
+
+bb98.preheader:		; preds = %cond_true15
+	br i1 false, label %bb103, label %bb69.outer
+
+bb76.split:		; preds = %bb69.outer.split.split, %bb69.us208
+	br i1 false, label %bb103, label %bb69.outer
+
+bb69.outer:		; preds = %bb76.split, %bb98.preheader
+	%from.0.reg2mem.0.ph.rec = phi i32 [ %tmp75.rec, %bb76.split ], [ 0, %bb98.preheader ]		; <i32> [#uses=1]
+	%tmp75.rec = add i32 %from.0.reg2mem.0.ph.rec, 1		; <i32> [#uses=2]
+	%tmp75 = getelementptr i8* null, i32 %tmp75.rec		; <i8*> [#uses=6]
+	br i1 false, label %bb69.us208, label %bb69.outer.split.split
+
+bb69.us208:		; preds = %bb69.outer
+	switch i32 0, label %bb76.split [
+		 i32 47, label %bb89
+		 i32 58, label %bb89
+		 i32 92, label %bb89
+	]
+
+bb69.outer.split.split:		; preds = %bb69.outer
+	switch i8 0, label %bb76.split [
+		 i8 47, label %bb89
+		 i8 58, label %bb89
+		 i8 92, label %bb89
+	]
+
+bb89:		; preds = %bb69.outer.split.split, %bb69.outer.split.split, %bb69.outer.split.split, %bb69.us208, %bb69.us208, %bb69.us208
+	%tmp75.lcssa189 = phi i8* [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.outer.split.split ], [ %tmp75, %bb69.outer.split.split ], [ %tmp75, %bb69.outer.split.split ]		; <i8*> [#uses=0]
+	ret i32 0
+
+bb103:		; preds = %bb76.split, %bb98.preheader
+	ret i32 0
+
+cond_next107:		; preds = %entry
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
new file mode 100644
index 0000000..1b8e67d
--- /dev/null
+++ b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll

@@ -0,0 +1,82 @@
+; RUN: llc < %s -march=x86 -mattr=sse2
+; ModuleID = 'yyy.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define <1 x i64> @a(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2 = extractelement <2 x i64> %tmp1, i32 0		; <i64> [#uses=1]
+	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
+	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %retval5
+}
+
+define <1 x i64> @b(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2 = extractelement <2 x i64> %tmp1, i32 1		; <i64> [#uses=1]
+	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
+	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %retval5
+}
+
+define i64 @c(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca i64, align 8		; <i64*> [#uses=2]
+	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp3 = extractelement <2 x i64> %tmp2, i32 0		; <i64> [#uses=1]
+	store i64 %tmp3, i64* %tmp, align 8
+	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	store i64 %tmp4, i64* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %retval5
+}
+
+define i64 @d(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca i64, align 8		; <i64*> [#uses=2]
+	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp3 = extractelement <2 x i64> %tmp2, i32 1		; <i64> [#uses=1]
+	store i64 %tmp3, i64* %tmp, align 8
+	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	store i64 %tmp4, i64* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %retval5
+}

diff --git a/test/CodeGen/X86/2007-11-01-ISelCrash.ll b/test/CodeGen/X86/2007-11-01-ISelCrash.ll
new file mode 100644
index 0000000..019c6a8
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-01-ISelCrash.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+
+        %"struct.K::JL" = type <{ i8 }>
+        %struct.jv = type { i64 }
+
+declare fastcc i64 @f(i32, %"struct.K::JL"*, i8*, i8*, %struct.jv*)
+
+define void @t(%"struct.K::JL"* %obj, i8* %name, i8* %sig, %struct.jv* %args) {
+entry:
+        %tmp5 = tail call fastcc i64 @f( i32 1, %"struct.K::JL"* %obj, i8* %name, i8* %sig, %struct.jv* %args )         ; <i64> [#uses=0]
+        ret void
+}

diff --git a/test/CodeGen/X86/2007-11-02-BadAsm.ll b/test/CodeGen/X86/2007-11-02-BadAsm.ll
new file mode 100644
index 0000000..4e11cda
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-02-BadAsm.ll

@@ -0,0 +1,144 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl | not grep rax
+
+	%struct.color_sample = type { i64 }
+	%struct.gs_matrix = type { float, i64, float, i64, float, i64, float, i64, float, i64, float, i64 }
+	%struct.ref = type { %struct.color_sample, i16, i16 }
+	%struct.status = type { %struct.gs_matrix, i8*, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32 }
+
+define i32 @ztype1imagepath(%struct.ref* %op) {
+entry:
+	br i1 false, label %cond_next, label %UnifiedReturnBlock
+
+cond_next:		; preds = %entry
+	br i1 false, label %cond_next68, label %UnifiedReturnBlock
+
+cond_next68:		; preds = %cond_next
+	%tmp5.i.i = malloc i8, i32 0		; <i8*> [#uses=2]
+	br i1 false, label %bb81.outer.i, label %xit.i
+
+bb81.outer.i:		; preds = %bb87.i, %cond_next68
+	%tmp67.i = add i32 0, 1		; <i32> [#uses=1]
+	br label %bb81.i
+
+bb61.i:		; preds = %bb81.i
+	%tmp71.i = getelementptr i8* %tmp5.i.i, i64 0		; <i8*> [#uses=1]
+	%tmp72.i = load i8* %tmp71.i, align 1		; <i8> [#uses=1]
+	%tmp73.i = icmp eq i8 %tmp72.i, 0		; <i1> [#uses=1]
+	br i1 %tmp73.i, label %bb81.i, label %xit.i
+
+bb81.i:		; preds = %bb61.i, %bb81.outer.i
+	br i1 false, label %bb87.i, label %bb61.i
+
+bb87.i:		; preds = %bb81.i
+	br i1 false, label %bb81.outer.i, label %xit.i
+
+xit.i:		; preds = %bb87.i, %bb61.i, %cond_next68
+	%lsbx.0.reg2mem.1.i = phi i32 [ 0, %cond_next68 ], [ 0, %bb61.i ], [ %tmp67.i, %bb87.i ]		; <i32> [#uses=1]
+	%tmp6162.i.i = fptrunc double 0.000000e+00 to float		; <float> [#uses=1]
+	%tmp67.i15.i = fptrunc double 0.000000e+00 to float		; <float> [#uses=1]
+	%tmp24.i27.i = icmp eq i64 0, 0		; <i1> [#uses=1]
+	br i1 %tmp24.i27.i, label %cond_next.i79.i, label %cond_true.i34.i
+
+cond_true.i34.i:		; preds = %xit.i
+	ret i32 0
+
+cond_next.i79.i:		; preds = %xit.i
+	%phitmp167.i = fptosi double 0.000000e+00 to i64		; <i64> [#uses=1]
+	%tmp142143.i = fpext float %tmp6162.i.i to double		; <double> [#uses=1]
+	%tmp2.i139.i = fadd double %tmp142143.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp23.i140.i = fptosi double %tmp2.i139.i to i64		; <i64> [#uses=1]
+	br i1 false, label %cond_true.i143.i, label %round_coord.exit148.i
+
+cond_true.i143.i:		; preds = %cond_next.i79.i
+	%tmp8.i142.i = icmp sgt i64 %tmp23.i140.i, -32768		; <i1> [#uses=1]
+	br i1 %tmp8.i142.i, label %cond_true11.i145.i, label %round_coord.exit148.i
+
+cond_true11.i145.i:		; preds = %cond_true.i143.i
+	ret i32 0
+
+round_coord.exit148.i:		; preds = %cond_true.i143.i, %cond_next.i79.i
+	%tmp144149.i = phi i32 [ 32767, %cond_next.i79.i ], [ -32767, %cond_true.i143.i ]		; <i32> [#uses=1]
+	store i32 %tmp144149.i, i32* null, align 8
+	%tmp147148.i = fpext float %tmp67.i15.i to double		; <double> [#uses=1]
+	%tmp2.i128.i = fadd double %tmp147148.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp23.i129.i = fptosi double %tmp2.i128.i to i64		; <i64> [#uses=2]
+	%tmp5.i130.i = icmp slt i64 %tmp23.i129.i, 32768		; <i1> [#uses=1]
+	br i1 %tmp5.i130.i, label %cond_true.i132.i, label %round_coord.exit137.i
+
+cond_true.i132.i:		; preds = %round_coord.exit148.i
+	%tmp8.i131.i = icmp sgt i64 %tmp23.i129.i, -32768		; <i1> [#uses=1]
+	br i1 %tmp8.i131.i, label %cond_true11.i134.i, label %round_coord.exit137.i
+
+cond_true11.i134.i:		; preds = %cond_true.i132.i
+	br label %round_coord.exit137.i
+
+round_coord.exit137.i:		; preds = %cond_true11.i134.i, %cond_true.i132.i, %round_coord.exit148.i
+	%tmp149138.i = phi i32 [ 0, %cond_true11.i134.i ], [ 32767, %round_coord.exit148.i ], [ -32767, %cond_true.i132.i ]		; <i32> [#uses=1]
+	br i1 false, label %cond_true.i121.i, label %round_coord.exit126.i
+
+cond_true.i121.i:		; preds = %round_coord.exit137.i
+	br i1 false, label %cond_true11.i123.i, label %round_coord.exit126.i
+
+cond_true11.i123.i:		; preds = %cond_true.i121.i
+	br label %round_coord.exit126.i
+
+round_coord.exit126.i:		; preds = %cond_true11.i123.i, %cond_true.i121.i, %round_coord.exit137.i
+	%tmp153127.i = phi i32 [ 0, %cond_true11.i123.i ], [ 32767, %round_coord.exit137.i ], [ -32767, %cond_true.i121.i ]		; <i32> [#uses=1]
+	br i1 false, label %cond_true.i110.i, label %round_coord.exit115.i
+
+cond_true.i110.i:		; preds = %round_coord.exit126.i
+	br i1 false, label %cond_true11.i112.i, label %round_coord.exit115.i
+
+cond_true11.i112.i:		; preds = %cond_true.i110.i
+	br label %round_coord.exit115.i
+
+round_coord.exit115.i:		; preds = %cond_true11.i112.i, %cond_true.i110.i, %round_coord.exit126.i
+	%tmp157116.i = phi i32 [ 0, %cond_true11.i112.i ], [ 32767, %round_coord.exit126.i ], [ -32767, %cond_true.i110.i ]		; <i32> [#uses=2]
+	br i1 false, label %cond_true.i99.i, label %round_coord.exit104.i
+
+cond_true.i99.i:		; preds = %round_coord.exit115.i
+	br i1 false, label %cond_true11.i101.i, label %round_coord.exit104.i
+
+cond_true11.i101.i:		; preds = %cond_true.i99.i
+	%tmp1213.i100.i = trunc i64 %phitmp167.i to i32		; <i32> [#uses=1]
+	br label %cond_next172.i
+
+round_coord.exit104.i:		; preds = %cond_true.i99.i, %round_coord.exit115.i
+	%UnifiedRetVal.i102.i = phi i32 [ 32767, %round_coord.exit115.i ], [ -32767, %cond_true.i99.i ]		; <i32> [#uses=1]
+	%tmp164.i = call fastcc i32 @put_int( %struct.status* null, i32 %tmp157116.i )		; <i32> [#uses=0]
+	br label %cond_next172.i
+
+cond_next172.i:		; preds = %round_coord.exit104.i, %cond_true11.i101.i
+	%tmp161105.reg2mem.0.i = phi i32 [ %tmp1213.i100.i, %cond_true11.i101.i ], [ %UnifiedRetVal.i102.i, %round_coord.exit104.i ]		; <i32> [#uses=1]
+	%tmp174.i = icmp eq i32 %tmp153127.i, 0		; <i1> [#uses=1]
+	%bothcond.i = and i1 false, %tmp174.i		; <i1> [#uses=1]
+	%tmp235.i = call fastcc i32 @put_int( %struct.status* null, i32 %tmp149138.i )		; <i32> [#uses=0]
+	%tmp245.i = load i8** null, align 8		; <i8*> [#uses=2]
+	%tmp246.i = getelementptr i8* %tmp245.i, i64 1		; <i8*> [#uses=1]
+	br i1 %bothcond.i, label %cond_next254.i, label %bb259.i
+
+cond_next254.i:		; preds = %cond_next172.i
+	store i8 13, i8* %tmp245.i, align 1
+	br label %bb259.i
+
+bb259.i:		; preds = %cond_next254.i, %cond_next172.i
+	%storemerge.i = phi i8* [ %tmp246.i, %cond_next254.i ], [ null, %cond_next172.i ]		; <i8*> [#uses=0]
+	%tmp261.i = shl i32 %lsbx.0.reg2mem.1.i, 2		; <i32> [#uses=1]
+	store i32 %tmp261.i, i32* null, align 8
+	%tmp270.i = add i32 0, %tmp157116.i		; <i32> [#uses=1]
+	store i32 %tmp270.i, i32* null, align 8
+	%tmp275.i = add i32 0, %tmp161105.reg2mem.0.i		; <i32> [#uses=0]
+	br i1 false, label %trace_cells.exit.i, label %bb.preheader.i.i
+
+bb.preheader.i.i:		; preds = %bb259.i
+	ret i32 0
+
+trace_cells.exit.i:		; preds = %bb259.i
+	free i8* %tmp5.i.i
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %cond_next, %entry
+	ret i32 -20
+}
+
+declare fastcc i32 @put_int(%struct.status*, i32)

diff --git a/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
new file mode 100644
index 0000000..27ec826
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s
+; PR1763
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @yield() {
+        %tmp9 = call i64 asm sideeffect "xchgb ${0:b},$1", "=q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i64* null, i64 0 )   ; <i64>
+        ret void
+}

diff --git a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
new file mode 100644
index 0000000..4045618
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR1766
+
+        %struct.dentry = type { %struct.dentry_operations* }
+        %struct.dentry_operations = type { i32 (%struct.dentry*, %struct.qstr*)* }
+        %struct.qstr = type { i32, i32, i8* }
+
+define %struct.dentry* @d_hash_and_lookup(%struct.dentry* %dir, %struct.qstr* %name) {
+entry:
+        br i1 false, label %bb37, label %bb
+
+bb:             ; preds = %bb, %entry
+        %name8.0.reg2mem.0.rec = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]           ; <i64> [#uses=1]
+        %hash.0.reg2mem.0 = phi i64 [ %tmp27, %bb ], [ 0, %entry ]              ; <i64> [#uses=1]
+        %tmp13 = load i8* null, align 1         ; <i8> [#uses=1]
+        %tmp1314 = zext i8 %tmp13 to i64                ; <i64> [#uses=1]
+        %tmp25 = lshr i64 %tmp1314, 4           ; <i64> [#uses=1]
+        %tmp22 = add i64 %tmp25, %hash.0.reg2mem.0              ; <i64> [#uses=1]
+        %tmp26 = add i64 %tmp22, 0              ; <i64> [#uses=1]
+        %tmp27 = mul i64 %tmp26, 11             ; <i64> [#uses=2]
+        %indvar.next = add i64 %name8.0.reg2mem.0.rec, 1                ; <i64> [#uses=2]
+        %exitcond = icmp eq i64 %indvar.next, 0         ; <i1> [#uses=1]
+        br i1 %exitcond, label %bb37.loopexit, label %bb
+
+bb37.loopexit:          ; preds = %bb
+        %phitmp = trunc i64 %tmp27 to i32               ; <i32> [#uses=1]
+        br label %bb37
+
+bb37:           ; preds = %bb37.loopexit, %entry
+        %hash.0.reg2mem.1 = phi i32 [ %phitmp, %bb37.loopexit ], [ 0, %entry ]          ; <i32> [#uses=1]
+        store i32 %hash.0.reg2mem.1, i32* null, align 8
+        %tmp75 = tail call i32 null( %struct.dentry* %dir, %struct.qstr* %name )                ; <i32> [#uses=0]
+        %tmp84 = tail call i32 (...)* @d_lookup( %struct.dentry* %dir, %struct.qstr* %name )            ; <i32> [#uses=0]
+        ret %struct.dentry* null
+}
+
+declare i32 @d_lookup(...)

diff --git a/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
new file mode 100644
index 0000000..6b871aa
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR1767
+
+define void @xor_sse_2(i64 %bytes, i64* %p1, i64* %p2) {
+entry:
+        %p2_addr = alloca i64*          ; <i64**> [#uses=2]
+        %lines = alloca i32             ; <i32*> [#uses=2]
+        store i64* %p2, i64** %p2_addr, align 8
+        %tmp1 = lshr i64 %bytes, 8              ; <i64> [#uses=1]
+        %tmp12 = trunc i64 %tmp1 to i32         ; <i32> [#uses=2]
+        store i32 %tmp12, i32* %lines, align 4
+        %tmp6 = call i64* asm sideeffect "foo",
+"=r,=*r,=*r,r,0,1,2,~{dirflag},~{fpsr},~{flags},~{memory}"( i64** %p2_addr,
+i32* %lines, i64 256, i64* %p1, i64* %p2, i32 %tmp12 )              ; <i64*> [#uses=0]
+        ret void
+}

diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
new file mode 100644
index 0000000..8e586a7
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -relocation-model=static | grep {foo _str$}
+; PR1761
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+@str = internal constant [12 x i8] c"init/main.c\00"		; <[12 x i8]*> [#uses=1]
+
+define i32 @unknown_bootoption() {
+entry:
+	tail call void asm sideeffect "foo ${0:c}\0A", "i,~{dirflag},~{fpsr},~{flags}"( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
+	ret i32 undef
+}

diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
new file mode 100644
index 0000000..f6db0d0
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+
+define float @foo(i32* %x, float* %y, i32 %c) nounwind {
+entry:
+	%tmp2132 = icmp eq i32 %c, 0		; <i1> [#uses=1]
+	br i1 %tmp2132, label %bb23, label %bb18
+
+bb18:		; preds = %bb18, %entry
+	%i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp17, %bb18 ]		; <i32> [#uses=3]
+	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]		; <float> [#uses=1]
+	%tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
+	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
+	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%tmp17 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%tmp21 = icmp ult i32 %tmp17, %c		; <i1> [#uses=1]
+	br i1 %tmp21, label %bb18, label %bb23
+
+bb23:		; preds = %bb18, %entry
+	%res.0.reg2mem.1 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]		; <float> [#uses=1]
+	ret float %res.0.reg2mem.1
+}

diff --git a/test/CodeGen/X86/2007-11-07-MulBy4.ll b/test/CodeGen/X86/2007-11-07-MulBy4.ll
new file mode 100644
index 0000000..d5b630b
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-07-MulBy4.ll

@@ -0,0 +1,129 @@
+; RUN: llc < %s -march=x86 | not grep imul
+
+	%struct.eebb = type { %struct.eebb*, i16* }
+	%struct.hf = type { %struct.hf*, i16*, i8*, i32, i32, %struct.eebb*, i32, i32, i8*, i8*, i8*, i8*, i16*, i8*, i16*, %struct.ri, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [30 x i32], %struct.eebb, i32, i8* }
+	%struct.foo_data = type { i32, i32, i32, i32*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i16*, i16*, i16*, i16*, i32, i32, i32, %struct.ri*, i8*, %struct.hf* }
+	%struct.ri = type { %struct.ri*, i32, i8*, i16*, i32*, i32 }
+
+define fastcc i32 @foo(i16* %eptr, i8* %ecode, %struct.foo_data* %md, i32 %ims) {
+entry:
+	%tmp36 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp37 = icmp ult i32 0, %tmp36		; <i1> [#uses=1]
+	br i1 %tmp37, label %cond_next79, label %cond_true
+
+cond_true:		; preds = %entry
+	ret i32 0
+
+cond_next79:		; preds = %entry
+	%tmp85 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp86 = icmp ult i32 0, %tmp85		; <i1> [#uses=1]
+	br i1 %tmp86, label %cond_next130, label %cond_true89
+
+cond_true89:		; preds = %cond_next79
+	ret i32 0
+
+cond_next130:		; preds = %cond_next79
+	%tmp173 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp173, label %cond_next201, label %cond_true176
+
+cond_true176:		; preds = %cond_next130
+	ret i32 0
+
+cond_next201:		; preds = %cond_next130
+	switch i32 0, label %bb19955 [
+		 i32 0, label %bb1266
+		 i32 1, label %bb5018
+		 i32 2, label %bb5075
+		 i32 3, label %cond_true5534
+		 i32 4, label %cond_true5534
+		 i32 5, label %bb6039
+		 i32 6, label %bb6181
+		 i32 7, label %bb6323
+		 i32 8, label %bb6463
+		 i32 9, label %bb6605
+		 i32 10, label %bb6746
+		 i32 11, label %cond_next5871
+		 i32 16, label %bb5452
+		 i32 17, label %bb5395
+		 i32 19, label %bb4883
+		 i32 20, label %bb5136
+		 i32 23, label %bb12899
+		 i32 64, label %bb2162
+		 i32 69, label %bb1447
+		 i32 70, label %bb1737
+		 i32 71, label %bb1447
+		 i32 72, label %bb1737
+		 i32 73, label %cond_true1984
+		 i32 75, label %bb740
+		 i32 80, label %bb552
+	]
+
+bb552:		; preds = %cond_next201
+	ret i32 0
+
+bb740:		; preds = %cond_next201
+	ret i32 0
+
+bb1266:		; preds = %cond_next201
+	ret i32 0
+
+bb1447:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+bb1737:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+cond_true1984:		; preds = %cond_next201
+	ret i32 0
+
+bb2162:		; preds = %cond_next201
+	ret i32 0
+
+bb4883:		; preds = %cond_next201
+	ret i32 0
+
+bb5018:		; preds = %cond_next201
+	ret i32 0
+
+bb5075:		; preds = %cond_next201
+	ret i32 0
+
+bb5136:		; preds = %cond_next201
+	ret i32 0
+
+bb5395:		; preds = %cond_next201
+	ret i32 0
+
+bb5452:		; preds = %cond_next201
+	ret i32 0
+
+cond_true5534:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+cond_next5871:		; preds = %cond_next201
+	ret i32 0
+
+bb6039:		; preds = %cond_next201
+	ret i32 0
+
+bb6181:		; preds = %cond_next201
+	ret i32 0
+
+bb6323:		; preds = %cond_next201
+	ret i32 0
+
+bb6463:		; preds = %cond_next201
+	ret i32 0
+
+bb6605:		; preds = %cond_next201
+	ret i32 0
+
+bb6746:		; preds = %cond_next201
+	ret i32 0
+
+bb12899:		; preds = %cond_next201
+	ret i32 0
+
+bb19955:		; preds = %cond_next201
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll
new file mode 100644
index 0000000..9c004f9
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll

@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | grep movl | count 2
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | not grep movb
+
+	%struct.double_int = type { i64, i64 }
+	%struct.tree_common = type <{ i8, [3 x i8] }>
+	%struct.tree_int_cst = type { %struct.tree_common, %struct.double_int }
+	%struct.tree_node = type { %struct.tree_int_cst }
+@tree_code_type = external constant [0 x i32]		; <[0 x i32]*> [#uses=1]
+
+define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {
+entry:
+	%tmp2526 = bitcast %struct.tree_node* %t1 to i32*		; <i32*> [#uses=1]
+	br i1 false, label %UnifiedReturnBlock, label %bb21
+
+bb21:		; preds = %entry
+	%tmp27 = load i32* %tmp2526, align 4		; <i32> [#uses=1]
+	%tmp29 = and i32 %tmp27, 255		; <i32> [#uses=3]
+	%tmp2930 = trunc i32 %tmp29 to i8		; <i8> [#uses=1]
+	%tmp37 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp39 = and i32 %tmp37, 255		; <i32> [#uses=2]
+	%tmp3940 = trunc i32 %tmp39 to i8		; <i8> [#uses=1]
+	%tmp43 = add i32 %tmp29, -3		; <i32> [#uses=1]
+	%tmp44 = icmp ult i32 %tmp43, 3		; <i1> [#uses=1]
+	br i1 %tmp44, label %bb47.split, label %bb76
+
+bb47.split:		; preds = %bb21
+	ret i32 0
+
+bb76:		; preds = %bb21
+	br i1 false, label %bb82, label %bb146.split
+
+bb82:		; preds = %bb76
+	%tmp94 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp39		; <i32*> [#uses=1]
+	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=1]
+	%tmp9596 = trunc i32 %tmp95 to i8		; <i8> [#uses=1]
+	%tmp98 = add i8 %tmp9596, -4		; <i8> [#uses=1]
+	%tmp99 = icmp ugt i8 %tmp98, 5		; <i1> [#uses=1]
+	br i1 %tmp99, label %bb102, label %bb106
+
+bb102:		; preds = %bb82
+	ret i32 0
+
+bb106:		; preds = %bb82
+	ret i32 0
+
+bb146.split:		; preds = %bb76
+	%tmp149 = icmp eq i8 %tmp2930, %tmp3940		; <i1> [#uses=1]
+	br i1 %tmp149, label %bb153, label %UnifiedReturnBlock
+
+bb153:		; preds = %bb146.split
+	switch i32 %tmp29, label %UnifiedReturnBlock [
+		 i32 0, label %bb155
+		 i32 1, label %bb187
+	]
+
+bb155:		; preds = %bb153
+	ret i32 0
+
+bb187:		; preds = %bb153
+	%tmp198 = icmp eq %struct.tree_node* %t1, %t2		; <i1> [#uses=1]
+	br i1 %tmp198, label %bb201, label %UnifiedReturnBlock
+
+bb201:		; preds = %bb187
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %bb187, %bb153, %bb146.split, %entry
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
new file mode 100644
index 0000000..721d4c9
--- /dev/null
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from
+; kicking in.
+
+declare fastcc void @rdft(i32, i32, double*, i32*, double*)
+
+define fastcc void @mp_sqrt(i32 %n, i32 %radix, i32* %in, i32* %out, i32* %tmp1, i32* %tmp2, i32 %nfft, double* %tmp1fft, double* %tmp2fft, i32* %ip, double* %w) nounwind {
+entry:
+	br label %bb.i5
+
+bb.i5:		; preds = %bb.i5, %entry
+	%nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ]		; <i32> [#uses=1]
+	%foo = phi i1 [1, %entry], [0, %bb.i5]
+	%tmp7.i3 = shl i32 %nfft_init.0.i, 1		; <i32> [#uses=2]
+	br i1 %foo, label %bb.i5, label %mp_unexp_mp2d.exit.i
+
+mp_unexp_mp2d.exit.i:		; preds = %bb.i5
+	br i1 %foo, label %cond_next.i, label %cond_true.i
+
+cond_true.i:		; preds = %mp_unexp_mp2d.exit.i
+	ret void
+
+cond_next.i:		; preds = %mp_unexp_mp2d.exit.i
+	%tmp22.i = sdiv i32 0, 2		; <i32> [#uses=2]
+	br i1 %foo, label %cond_true29.i, label %cond_next36.i
+
+cond_true29.i:		; preds = %cond_next.i
+	ret void
+
+cond_next36.i:		; preds = %cond_next.i
+	store i32 %tmp22.i, i32* null, align 4
+	%tmp8.i14.i = select i1 %foo, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb.i28.i
+
+bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
+; CHECK: %bb.i28.i
+; CHECK: addl $2
+; CHECK: addl $2
+	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2]
+	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1]
+	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2]
+	%tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix		; <i1> [#uses=1]
+	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1]
+	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0]
+	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1]
+	%tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
+	%tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
+	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=2]
+	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1]
+	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
+
+mp_unexp_d2mp.exit29.i:		; preds = %bb.i28.i
+	%tmp46.i = sub i32 0, %tmp22.i		; <i32> [#uses=1]
+	store i32 %tmp46.i, i32* null, align 4
+	br i1 %exitcond40.i, label %bb.i.i, label %mp_sqrt_init.exit
+
+bb.i.i:		; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i
+	br label %bb.i.i
+
+mp_sqrt_init.exit:		; preds = %mp_unexp_d2mp.exit29.i
+	tail call fastcc void @mp_mul_csqu( i32 0, double* %tmp1fft )
+	tail call fastcc void @rdft( i32 0, i32 -1, double* null, i32* %ip, double* %w )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, double* %tmp1fft, i32* %tmp2 )
+	br i1 %exitcond40.i, label %cond_false.i, label %cond_true36.i
+
+cond_true36.i:		; preds = %mp_sqrt_init.exit
+	ret void
+
+cond_false.i:		; preds = %mp_sqrt_init.exit
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %out )
+	tail call fastcc void @mp_add( i32 0, i32 %radix, i32* %tmp1, i32* %tmp2, i32* %tmp1 )
+	tail call fastcc void @mp_sub( i32 0, i32 %radix, i32* %in, i32* %tmp2, i32* %tmp2 )
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %tmp1 )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, double* %tmp2fft, i32* %tmp2 )
+	ret void
+}
+
+declare fastcc void @mp_add(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_sub(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_round(i32, i32, i32, i32*)
+
+declare fastcc void @mp_mul_csqu(i32, double*)
+
+declare fastcc void @mp_mul_d2i(i32, i32, i32, double*, i32*)

diff --git a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
new file mode 100644
index 0000000..ca995cc
--- /dev/null
+++ b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll

@@ -0,0 +1,680 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
+
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.ggBRDF = type { i32 (...)** }
+	%"struct.ggBST<ggMaterial>" = type { %"struct.ggBSTNode<ggMaterial>"*, i32 }
+	%"struct.ggBST<ggRasterSurfaceTexture>" = type { %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, i32 }
+	%"struct.ggBST<ggSolidTexture>" = type { %"struct.ggBSTNode<ggSolidTexture>"*, i32 }
+	%"struct.ggBST<ggSpectrum>" = type { %"struct.ggBSTNode<ggSpectrum>"*, i32 }
+	%"struct.ggBST<mrObjectRecord>" = type { %"struct.ggBSTNode<mrObjectRecord>"*, i32 }
+	%"struct.ggBSTNode<ggMaterial>" = type { %"struct.ggBSTNode<ggMaterial>"*, %"struct.ggBSTNode<ggMaterial>"*, %struct.ggString, %struct.ggMaterial* }
+	%"struct.ggBSTNode<ggRasterSurfaceTexture>" = type { %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, %struct.ggString, %struct.ggRasterSurfaceTexture* }
+	%"struct.ggBSTNode<ggSolidTexture>" = type { %"struct.ggBSTNode<ggSolidTexture>"*, %"struct.ggBSTNode<ggSolidTexture>"*, %struct.ggString, %struct.ggBRDF* }
+	%"struct.ggBSTNode<ggSpectrum>" = type { %"struct.ggBSTNode<ggSpectrum>"*, %"struct.ggBSTNode<ggSpectrum>"*, %struct.ggString, %struct.ggSpectrum* }
+	%"struct.ggBSTNode<mrObjectRecord>" = type { %"struct.ggBSTNode<mrObjectRecord>"*, %"struct.ggBSTNode<mrObjectRecord>"*, %struct.ggString, %struct.mrObjectRecord* }
+	%"struct.ggDictionary<ggMaterial>" = type { %"struct.ggBST<ggMaterial>" }
+	%"struct.ggDictionary<ggRasterSurfaceTexture>" = type { %"struct.ggBST<ggRasterSurfaceTexture>" }
+	%"struct.ggDictionary<ggSolidTexture>" = type { %"struct.ggBST<ggSolidTexture>" }
+	%"struct.ggDictionary<ggSpectrum>" = type { %"struct.ggBST<ggSpectrum>" }
+	%"struct.ggDictionary<mrObjectRecord>" = type { %"struct.ggBST<mrObjectRecord>" }
+	%struct.ggHAffineMatrix3 = type { %struct.ggHMatrix3 }
+	%struct.ggHBoxMatrix3 = type { %struct.ggHAffineMatrix3 }
+	%struct.ggHMatrix3 = type { [4 x [4 x double]] }
+	%struct.ggMaterial = type { i32 (...)**, %struct.ggBRDF* }
+	%struct.ggPoint3 = type { [3 x double] }
+	%"struct.ggRGBPixel<char>" = type { [3 x i8], i8 }
+	%"struct.ggRaster<ggRGBPixel<unsigned char> >" = type { i32, i32, %"struct.ggRGBPixel<char>"* }
+	%struct.ggRasterSurfaceTexture = type { %"struct.ggRaster<ggRGBPixel<unsigned char> >"* }
+	%struct.ggSolidNoise3 = type { i32, [256 x %struct.ggPoint3], [256 x i32] }
+	%struct.ggSpectrum = type { [8 x float] }
+	%struct.ggString = type { %"struct.ggString::StringRep"* }
+	%"struct.ggString::StringRep" = type { i32, i32, [1 x i8] }
+	%"struct.ggTrain<mrPixelRenderer*>" = type { %struct.ggBRDF**, i32, i32 }
+	%struct.mrObjectRecord = type { %struct.ggHBoxMatrix3, %struct.ggHBoxMatrix3, %struct.mrSurfaceList, %struct.ggMaterial*, i32, %struct.ggRasterSurfaceTexture*, %struct.ggBRDF*, i32, i32 }
+	%struct.mrScene = type { %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggBRDF*, %struct.ggBRDF*, %struct.ggBRDF*, i32, double, %"struct.ggDictionary<mrObjectRecord>", %"struct.ggDictionary<ggRasterSurfaceTexture>", %"struct.ggDictionary<ggSolidTexture>", %"struct.ggDictionary<ggSpectrum>", %"struct.ggDictionary<ggMaterial>" }
+	%struct.mrSurfaceList = type { %struct.ggBRDF, %"struct.ggTrain<mrPixelRenderer*>" }
+	%"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* }
+	%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+@.str80 = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
+@.str81 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+
+define fastcc void @_ZN7mrScene4ReadERSi(%struct.mrScene* %this, %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces) {
+entry:
+	%tmp6.i.i8288 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit unwind label %lpad		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit:		; preds = %entry
+	%tmp6.i.i8995 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit96 unwind label %lpad3825		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit96:		; preds = %_ZN8ggStringC1Ei.exit
+	%tmp6.i.i97103 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit104 unwind label %lpad3829		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit104:		; preds = %_ZN8ggStringC1Ei.exit96
+	%tmp6.i.i105111 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit112 unwind label %lpad3833		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit112:		; preds = %_ZN8ggStringC1Ei.exit104
+	%tmp6.i.i122128 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit129 unwind label %lpad3837		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit129:		; preds = %_ZN8ggStringC1Ei.exit112
+	%tmp6.i.i132138 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit139 unwind label %lpad3841		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit139:		; preds = %_ZN8ggStringC1Ei.exit129
+	%tmp295 = invoke i8* @_Znwm( i32 16 )
+			to label %invcont294 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont294:		; preds = %_ZN8ggStringC1Ei.exit139
+	%tmp10.i.i141 = invoke i8* @_Znam( i32 16 )
+			to label %_ZN13mrSurfaceListC1Ev.exit unwind label %lpad3849		; <i8*> [#uses=0]
+
+_ZN13mrSurfaceListC1Ev.exit:		; preds = %invcont294
+	%tmp3.i148 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i.noexc:		; preds = %_ZN13mrSurfaceListC1Ev.exit
+	%tmp15.i149 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i.noexc:		; preds = %tmp3.i.noexc
+	br i1 false, label %bb308, label %bb.i
+
+bb.i:		; preds = %tmp15.i.noexc
+	ret void
+
+bb308:		; preds = %tmp15.i.noexc
+	br i1 false, label %bb3743.preheader, label %bb315
+
+bb3743.preheader:		; preds = %bb308
+	%tmp16.i3862 = getelementptr %struct.ggPoint3* null, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp16.i3859 = getelementptr %struct.ggPoint3* null, i32 0, i32 0, i32 0		; <double*> [#uses=3]
+	br label %bb3743
+
+bb315:		; preds = %bb308
+	ret void
+
+bb333:		; preds = %invcont3758, %invcont335
+	%tmp3.i167180 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i167.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i167.noexc:		; preds = %bb333
+	%tmp15.i182 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i.noexc181 unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i.noexc181:		; preds = %tmp3.i167.noexc
+	br i1 false, label %invcont335, label %bb.i178
+
+bb.i178:		; preds = %tmp15.i.noexc181
+	ret void
+
+invcont335:		; preds = %tmp15.i.noexc181
+	br i1 false, label %bb3743, label %bb333
+
+bb345:		; preds = %invcont3758
+	br i1 false, label %bb353, label %bb360
+
+bb353:		; preds = %bb345
+	%tmp356 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %bb3743 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+bb360:		; preds = %bb345
+	br i1 false, label %bb368, label %bb374
+
+bb368:		; preds = %bb360
+	%tmp373 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %bb3743 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+bb374:		; preds = %bb360
+	br i1 false, label %bb396, label %bb421
+
+bb396:		; preds = %bb374
+	ret void
+
+bb421:		; preds = %bb374
+	br i1 false, label %bb429, label %bb530
+
+bb429:		; preds = %bb421
+	ret void
+
+bb530:		; preds = %bb421
+	br i1 false, label %bb538, label %bb673
+
+bb538:		; preds = %bb530
+	ret void
+
+bb673:		; preds = %bb530
+	br i1 false, label %bb681, label %bb778
+
+bb681:		; preds = %bb673
+	ret void
+
+bb778:		; preds = %bb673
+	br i1 false, label %bb786, label %bb891
+
+bb786:		; preds = %bb778
+	ret void
+
+bb891:		; preds = %bb778
+	br i1 false, label %bb899, label %bb998
+
+bb899:		; preds = %bb891
+	ret void
+
+bb998:		; preds = %bb891
+	br i1 false, label %bb1168, label %bb1190
+
+bb1168:		; preds = %bb998
+	ret void
+
+bb1190:		; preds = %bb998
+	br i1 false, label %bb1198, label %bb1220
+
+bb1198:		; preds = %bb1190
+	ret void
+
+bb1220:		; preds = %bb1190
+	br i1 false, label %bb1228, label %bb1250
+
+bb1228:		; preds = %bb1220
+	ret void
+
+bb1250:		; preds = %bb1220
+	br i1 false, label %bb1258, label %bb1303
+
+bb1258:		; preds = %bb1250
+	ret void
+
+bb1303:		; preds = %bb1250
+	br i1 false, label %bb1311, label %bb1366
+
+bb1311:		; preds = %bb1303
+	ret void
+
+bb1366:		; preds = %bb1303
+	br i1 false, label %bb1374, label %bb1432
+
+bb1374:		; preds = %bb1366
+	ret void
+
+bb1432:		; preds = %bb1366
+	br i1 false, label %bb1440, label %bb1495
+
+bb1440:		; preds = %bb1432
+	ret void
+
+bb1495:		; preds = %bb1432
+	br i1 false, label %bb1503, label %bb1561
+
+bb1503:		; preds = %bb1495
+	ret void
+
+bb1561:		; preds = %bb1495
+	br i1 false, label %bb1569, label %bb1624
+
+bb1569:		; preds = %bb1561
+	ret void
+
+bb1624:		; preds = %bb1561
+	br i1 false, label %bb1632, label %bb1654
+
+bb1632:		; preds = %bb1624
+	store double 0.000000e+00, double* %tmp16.i3859, align 8
+	%tmp3.i38383852 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3838.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3838.noexc:		; preds = %bb1632
+	%tmp15.i38473853 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3847.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3847.noexc:		; preds = %tmp3.i3838.noexc
+	br i1 false, label %invcont1634, label %bb.i3850
+
+bb.i3850:		; preds = %tmp15.i3847.noexc
+	ret void
+
+invcont1634:		; preds = %tmp15.i3847.noexc
+	%tmp3.i38173831 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3817.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3817.noexc:		; preds = %invcont1634
+	%tmp15.i38263832 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3826.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3826.noexc:		; preds = %tmp3.i3817.noexc
+	br i1 false, label %invcont1636, label %bb.i3829
+
+bb.i3829:		; preds = %tmp15.i3826.noexc
+	ret void
+
+invcont1636:		; preds = %tmp15.i3826.noexc
+	%tmp8.i38083811 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* %tmp16.i3862 )
+			to label %tmp8.i3808.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp8.i3808.noexc:		; preds = %invcont1636
+	%tmp9.i38093812 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i38083811, double* null )
+			to label %tmp9.i3809.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp9.i3809.noexc:		; preds = %tmp8.i3808.noexc
+	%tmp10.i38103813 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i38093812, double* null )
+			to label %invcont1638 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont1638:		; preds = %tmp9.i3809.noexc
+	%tmp8.i37983801 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* %tmp16.i3859 )
+			to label %tmp8.i3798.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp8.i3798.noexc:		; preds = %invcont1638
+	%tmp9.i37993802 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i37983801, double* null )
+			to label %tmp9.i3799.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp9.i3799.noexc:		; preds = %tmp8.i3798.noexc
+	%tmp10.i38003803 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i37993802, double* null )
+			to label %invcont1640 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont1640:		; preds = %tmp9.i3799.noexc
+	%tmp3.i3778 = load double* %tmp16.i3859, align 8		; <double> [#uses=1]
+	%tmp1643 = invoke i8* @_Znwm( i32 76 )
+			to label %invcont1642 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont1642:		; preds = %invcont1640
+	%tmp18.i3770 = fsub double %tmp3.i3778, 0.000000e+00		; <double> [#uses=0]
+	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
+			to label %bb3743 unwind label %lpad3845
+
+bb1654:		; preds = %bb1624
+	br i1 false, label %bb1662, label %bb1693
+
+bb1662:		; preds = %bb1654
+	%tmp3.i37143728 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3714.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3714.noexc:		; preds = %bb1662
+	%tmp15.i37233729 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3723.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3723.noexc:		; preds = %tmp3.i3714.noexc
+	ret void
+
+bb1693:		; preds = %bb1654
+	br i1 false, label %bb1701, label %bb1745
+
+bb1701:		; preds = %bb1693
+	%tmp3.i36493663 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3649.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3649.noexc:		; preds = %bb1701
+	ret void
+
+bb1745:		; preds = %bb1693
+	br i1 false, label %bb1753, label %bb1797
+
+bb1753:		; preds = %bb1745
+	ret void
+
+bb1797:		; preds = %bb1745
+	br i1 false, label %bb1805, label %bb1847
+
+bb1805:		; preds = %bb1797
+	ret void
+
+bb1847:		; preds = %bb1797
+	br i1 false, label %bb1855, label %bb1897
+
+bb1855:		; preds = %bb1847
+	%tmp3.i34633477 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3463.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3463.noexc:		; preds = %bb1855
+	%tmp15.i34723478 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3472.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3472.noexc:		; preds = %tmp3.i3463.noexc
+	br i1 false, label %invcont1857, label %bb.i3475
+
+bb.i3475:		; preds = %tmp15.i3472.noexc
+	invoke fastcc void @_ZN8ggStringaSEPKc( %struct.ggString* null, i8* null )
+			to label %invcont1857 unwind label %lpad3845
+
+invcont1857:		; preds = %bb.i3475, %tmp15.i3472.noexc
+	%tmp1860 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %invcont1859 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1859:		; preds = %invcont1857
+	%tmp1862 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1860, double* null )
+			to label %invcont1861 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1861:		; preds = %invcont1859
+	%tmp1864 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1862, double* null )
+			to label %invcont1863 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1863:		; preds = %invcont1861
+	%tmp1866 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1864, double* null )
+			to label %invcont1865 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1865:		; preds = %invcont1863
+	%tmp1868 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1866, double* null )
+			to label %invcont1867 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont1867:		; preds = %invcont1865
+	%tmp1881 = invoke i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null ) zeroext 
+			to label %invcont1880 unwind label %lpad3845		; <i8> [#uses=0]
+
+invcont1880:		; preds = %invcont1867
+	%tmp1883 = invoke i8* @_Znwm( i32 24 )
+			to label %invcont1882 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont1882:		; preds = %invcont1880
+	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
+			to label %bb3743 unwind label %lpad3845
+
+bb1897:		; preds = %bb1847
+	br i1 false, label %bb1905, label %bb1947
+
+bb1905:		; preds = %bb1897
+	ret void
+
+bb1947:		; preds = %bb1897
+	br i1 false, label %bb1955, label %bb2000
+
+bb1955:		; preds = %bb1947
+	ret void
+
+bb2000:		; preds = %bb1947
+	br i1 false, label %bb2008, label %bb2053
+
+bb2008:		; preds = %bb2000
+	ret void
+
+bb2053:		; preds = %bb2000
+	br i1 false, label %bb2061, label %bb2106
+
+bb2061:		; preds = %bb2053
+	%tmp3.i32433257 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3243.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3243.noexc:		; preds = %bb2061
+	%tmp15.i32523258 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %bb.i3255 unwind label %lpad3845		; <i8*> [#uses=0]
+
+bb.i3255:		; preds = %tmp3.i3243.noexc
+	invoke fastcc void @_ZN8ggStringaSEPKc( %struct.ggString* null, i8* null )
+			to label %invcont2063 unwind label %lpad3845
+
+invcont2063:		; preds = %bb.i3255
+	ret void
+
+bb2106:		; preds = %bb2053
+	%tmp7.i3214 = call i32 @strcmp( i8* %tmp5.i161, i8* getelementptr ([7 x i8]* @.str80, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=0]
+	br i1 false, label %bb2114, label %bb2136
+
+bb2114:		; preds = %bb2106
+	%tmp3.i31923206 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3192.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3192.noexc:		; preds = %bb2114
+	%tmp15.i32013207 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3201.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3201.noexc:		; preds = %tmp3.i3192.noexc
+	br i1 false, label %invcont2116, label %bb.i3204
+
+bb.i3204:		; preds = %tmp15.i3201.noexc
+	ret void
+
+invcont2116:		; preds = %tmp15.i3201.noexc
+	%tmp3.i31713185 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3171.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3171.noexc:		; preds = %invcont2116
+	%tmp15.i31803186 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3180.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3180.noexc:		; preds = %tmp3.i3171.noexc
+	br i1 false, label %invcont2118, label %bb.i3183
+
+bb.i3183:		; preds = %tmp15.i3180.noexc
+	ret void
+
+invcont2118:		; preds = %tmp15.i3180.noexc
+	%tmp8.i31623165 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %tmp8.i3162.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp8.i3162.noexc:		; preds = %invcont2118
+	%tmp9.i31633166 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i31623165, double* null )
+			to label %tmp9.i3163.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp9.i3163.noexc:		; preds = %tmp8.i3162.noexc
+	%tmp10.i31643167 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i31633166, double* null )
+			to label %invcont2120 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont2120:		; preds = %tmp9.i3163.noexc
+	%tmp2123 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %invcont2122 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont2122:		; preds = %invcont2120
+	%tmp2125 = invoke i8* @_Znwm( i32 36 )
+			to label %invcont2124 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont2124:		; preds = %invcont2122
+	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
+			to label %bb3743 unwind label %lpad3845
+
+bb2136:		; preds = %bb2106
+	%tmp7.i3128 = call i32 @strcmp( i8* %tmp5.i161, i8* getelementptr ([11 x i8]* @.str81, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=0]
+	br i1 false, label %bb2144, label %bb3336
+
+bb2144:		; preds = %bb2136
+	%tmp6.i.i31173123 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit3124 unwind label %lpad3845		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit3124:		; preds = %bb2144
+	%tmp3.i30983112 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3098.noexc unwind label %lpad3921		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3098.noexc:		; preds = %_ZN8ggStringC1Ei.exit3124
+	%tmp15.i31073113 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3107.noexc unwind label %lpad3921		; <i8*> [#uses=0]
+
+tmp15.i3107.noexc:		; preds = %tmp3.i3098.noexc
+	br i1 false, label %invcont2147, label %bb.i3110
+
+bb.i3110:		; preds = %tmp15.i3107.noexc
+	ret void
+
+invcont2147:		; preds = %tmp15.i3107.noexc
+	%tmp2161 = invoke i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null ) zeroext 
+			to label %invcont2160 unwind label %lpad3921		; <i8> [#uses=0]
+
+invcont2160:		; preds = %invcont2147
+	%tmp4.i30933094 = invoke fastcc %struct.ggSpectrum* @_ZN5ggBSTI10ggSpectrumE4findERK8ggString3( %"struct.ggBSTNode<ggSpectrum>"* null, %struct.ggString* null )
+			to label %invcont2164 unwind label %lpad3921		; <%struct.ggSpectrum*> [#uses=0]
+
+invcont2164:		; preds = %invcont2160
+	br i1 false, label %bb2170, label %bb2181
+
+bb2170:		; preds = %invcont2164
+	ret void
+
+bb2181:		; preds = %invcont2164
+	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
+			to label %bb3743 unwind label %lpad3845
+
+bb3336:		; preds = %bb2136
+	br i1 false, label %bb3344, label %bb3734
+
+bb3344:		; preds = %bb3336
+	%tmp6.i.i773779 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit780 unwind label %lpad3845		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit780:		; preds = %bb3344
+	%tmp6.i.i765771 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit772 unwind label %lpad4025		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit772:		; preds = %_ZN8ggStringC1Ei.exit780
+	%tmp3.i746760 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i746.noexc unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i746.noexc:		; preds = %_ZN8ggStringC1Ei.exit772
+	%tmp15.i755761 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i755.noexc unwind label %lpad4029		; <i8*> [#uses=0]
+
+tmp15.i755.noexc:		; preds = %tmp3.i746.noexc
+	br i1 false, label %invcont3348, label %bb.i758
+
+bb.i758:		; preds = %tmp15.i755.noexc
+	ret void
+
+invcont3348:		; preds = %tmp15.i755.noexc
+	%tmp3.i726740 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i726.noexc unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i726.noexc:		; preds = %invcont3348
+	%tmp15.i735741 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i735.noexc unwind label %lpad4029		; <i8*> [#uses=0]
+
+tmp15.i735.noexc:		; preds = %tmp3.i726.noexc
+	br i1 false, label %bb3458, label %bb.i738
+
+bb.i738:		; preds = %tmp15.i735.noexc
+	ret void
+
+bb3458:		; preds = %tmp15.i735.noexc
+	br i1 false, label %bb3466, label %bb3491
+
+bb3466:		; preds = %bb3458
+	%tmp3469 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %invcont3468 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont3468:		; preds = %bb3466
+	%tmp3471 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp3469, double* null )
+			to label %invcont3470 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont3470:		; preds = %invcont3468
+	%tmp3473 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERi( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp3471, i32* null )
+			to label %invcont3472 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont3472:		; preds = %invcont3470
+	%tmp3475 = invoke i8* @_Znwm( i32 7196 )
+			to label %invcont3474 unwind label %lpad4029		; <i8*> [#uses=1]
+
+invcont3474:		; preds = %invcont3472
+	invoke fastcc void @_ZN13ggSolidNoise3C1Ev( %struct.ggSolidNoise3* null )
+			to label %_ZN22ggCoverageSolidTextureC1Eddi.exit unwind label %lpad4045
+
+_ZN22ggCoverageSolidTextureC1Eddi.exit:		; preds = %invcont3474
+	%tmp34823483 = bitcast i8* %tmp3475 to %struct.ggBRDF*		; <%struct.ggBRDF*> [#uses=2]
+	invoke fastcc void @_ZN5ggBSTI14ggSolidTextureE17InsertIntoSubtreeERK8ggStringPS0_RP9ggBSTNodeIS0_E( %"struct.ggBST<ggSolidTexture>"* null, %struct.ggString* null, %struct.ggBRDF* %tmp34823483, %"struct.ggBSTNode<ggSolidTexture>"** null )
+			to label %bb3662 unwind label %lpad4029
+
+bb3491:		; preds = %bb3458
+	ret void
+
+bb3662:		; preds = %_ZN22ggCoverageSolidTextureC1Eddi.exit
+	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
+			to label %invcont3663 unwind label %lpad4025
+
+invcont3663:		; preds = %bb3662
+	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
+			to label %bb3743 unwind label %lpad3845
+
+bb3734:		; preds = %bb3336
+	ret void
+
+bb3743:		; preds = %invcont3663, %bb2181, %invcont2124, %invcont1882, %invcont1642, %bb368, %bb353, %invcont335, %bb3743.preheader
+	%tex1.3 = phi %struct.ggBRDF* [ undef, %bb3743.preheader ], [ %tex1.3, %bb368 ], [ %tex1.3, %invcont1642 ], [ %tex1.3, %invcont1882 ], [ %tex1.3, %invcont2124 ], [ %tex1.3, %bb2181 ], [ %tex1.3, %invcont335 ], [ %tmp34823483, %invcont3663 ], [ %tex1.3, %bb353 ]		; <%struct.ggBRDF*> [#uses=7]
+	%tmp3.i312325 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i312.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i312.noexc:		; preds = %bb3743
+	%tmp15.i327 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i.noexc326 unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i.noexc326:		; preds = %tmp3.i312.noexc
+	br i1 false, label %invcont3745, label %bb.i323
+
+bb.i323:		; preds = %tmp15.i.noexc326
+	ret void
+
+invcont3745:		; preds = %tmp15.i.noexc326
+	%tmp3759 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %invcont3758 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont3758:		; preds = %invcont3745
+	%tmp5.i161 = getelementptr %"struct.ggString::StringRep"* null, i32 0, i32 2, i32 0		; <i8*> [#uses=2]
+	br i1 false, label %bb333, label %bb345
+
+lpad:		; preds = %entry
+	ret void
+
+lpad3825:		; preds = %_ZN8ggStringC1Ei.exit
+	ret void
+
+lpad3829:		; preds = %_ZN8ggStringC1Ei.exit96
+	ret void
+
+lpad3833:		; preds = %_ZN8ggStringC1Ei.exit104
+	ret void
+
+lpad3837:		; preds = %_ZN8ggStringC1Ei.exit112
+	ret void
+
+lpad3841:		; preds = %_ZN8ggStringC1Ei.exit129
+	ret void
+
+lpad3845:		; preds = %invcont3745, %tmp3.i312.noexc, %bb3743, %invcont3663, %bb3344, %bb2181, %bb2144, %invcont2124, %invcont2122, %invcont2120, %tmp9.i3163.noexc, %tmp8.i3162.noexc, %invcont2118, %tmp3.i3171.noexc, %invcont2116, %tmp3.i3192.noexc, %bb2114, %bb.i3255, %tmp3.i3243.noexc, %bb2061, %invcont1882, %invcont1880, %invcont1867, %invcont1865, %invcont1863, %invcont1861, %invcont1859, %invcont1857, %bb.i3475, %tmp3.i3463.noexc, %bb1855, %bb1701, %tmp3.i3714.noexc, %bb1662, %invcont1642, %invcont1640, %tmp9.i3799.noexc, %tmp8.i3798.noexc, %invcont1638, %tmp9.i3809.noexc, %tmp8.i3808.noexc, %invcont1636, %tmp3.i3817.noexc, %invcont1634, %tmp3.i3838.noexc, %bb1632, %bb368, %bb353, %tmp3.i167.noexc, %bb333, %tmp3.i.noexc, %_ZN13mrSurfaceListC1Ev.exit, %_ZN8ggStringC1Ei.exit139
+	ret void
+
+lpad3849:		; preds = %invcont294
+	ret void
+
+lpad3921:		; preds = %invcont2160, %invcont2147, %tmp3.i3098.noexc, %_ZN8ggStringC1Ei.exit3124
+	ret void
+
+lpad4025:		; preds = %bb3662, %_ZN8ggStringC1Ei.exit780
+	ret void
+
+lpad4029:		; preds = %_ZN22ggCoverageSolidTextureC1Eddi.exit, %invcont3472, %invcont3470, %invcont3468, %bb3466, %tmp3.i726.noexc, %invcont3348, %tmp3.i746.noexc, %_ZN8ggStringC1Ei.exit772
+	ret void
+
+lpad4045:		; preds = %invcont3474
+	ret void
+}
+
+declare fastcc void @_ZN8ggStringD1Ev(%struct.ggString*)
+
+declare i8* @_Znam(i32)
+
+declare fastcc void @_ZN8ggStringaSEPKc(%struct.ggString*, i8*)
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly 
+
+declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERi(%"struct.std::basic_istream<char,std::char_traits<char> >"*, i32*)
+
+declare i8* @_Znwm(i32)
+
+declare i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv(%"struct.std::basic_ios<char,std::char_traits<char> >"*)
+
+declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd(%"struct.std::basic_istream<char,std::char_traits<char> >"*, double*)
+
+declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"*, i8*)
+
+declare fastcc void @_ZN13ggSolidNoise3C1Ev(%struct.ggSolidNoise3*)
+
+declare i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv(%"struct.std::basic_ios<char,std::char_traits<char> >"*) zeroext 
+
+declare fastcc %struct.ggSpectrum* @_ZN5ggBSTI10ggSpectrumE4findERK8ggString3(%"struct.ggBSTNode<ggSpectrum>"*, %struct.ggString*)
+
+declare fastcc void @_ZN5ggBSTI14ggSolidTextureE17InsertIntoSubtreeERK8ggStringPS0_RP9ggBSTNodeIS0_E(%"struct.ggBST<ggSolidTexture>"*, %struct.ggString*, %struct.ggBRDF*, %"struct.ggBSTNode<ggSolidTexture>"**)
+
+declare fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i(%struct.mrScene*, %struct.ggBRDF*, %struct.ggString*, %struct.ggString*, i32)

diff --git a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
new file mode 100644
index 0000000..455de91
--- /dev/null
+++ b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu
+; PR1799
+
+	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
+	%struct.c34007g__pkg__parent = type { i32*, %struct.c34007g__designated___XUB* }
+
+define void @_ada_c34007g() {
+entry:
+	%x8 = alloca %struct.c34007g__pkg__parent, align 8		; <%struct.c34007g__pkg__parent*> [#uses=2]
+	br i1 true, label %bb1271, label %bb848
+
+bb848:		; preds = %entry
+	ret void
+
+bb1271:		; preds = %bb898
+	%tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0		; <i32**> [#uses=1]
+	%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64*		; <i64*> [#uses=1]
+	br i1 true, label %bb4668, label %bb848
+
+bb4668:		; preds = %bb4648
+	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
+	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
+	call fastcc void @c34007g__pkg__create.311( %struct.c34007g__pkg__parent* null, i32 7, i32 9, i32 2, i32 4, i32 1 )
+	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
+	%tmp5497 = or i1 %tmp5475, false		; <i1> [#uses=1]
+	br i1 %tmp5497, label %bb848, label %bb5507
+
+bb5507:		; preds = %bb4668
+	ret void
+
+}
+
+declare fastcc void @c34007g__pkg__create.311(%struct.c34007g__pkg__parent*, i32, i32, i32, i32, i32)

diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
new file mode 100644
index 0000000..265d968
--- /dev/null
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; PR1872
+
+	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
+	%struct.c34007g__pkg__parent = type { i32*, %struct.c34007g__designated___XUB* }
+
+define void @_ada_c34007g() {
+entry:
+	%x8 = alloca %struct.c34007g__pkg__parent, align 8		; <%struct.c34007g__pkg__parent*> [#uses=2]
+	%tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0		; <i32**> [#uses=1]
+	%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64*		; <i64*> [#uses=1]
+	br i1 true, label %bb4668, label %bb848
+
+bb4668:		; preds = %bb4648
+	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
+	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
+	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
+	%tmp5497 = or i1 %tmp5475, false		; <i1> [#uses=1]
+	br i1 %tmp5497, label %bb848, label %bb5507
+
+bb848:		; preds = %entry
+	ret void
+
+bb5507:		; preds = %bb4668
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-01-08-IllegalCMP.ll b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
new file mode 100644
index 0000000..7aec613
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-08-IllegalCMP.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i64 @__absvdi2(i64 %a) nounwind  {
+entry:
+	%w.0 = select i1 false, i64 0, i64 %a		; <i64> [#uses=2]
+	%tmp9 = icmp slt i64 %w.0, 0		; <i1> [#uses=1]
+	br i1 %tmp9, label %bb12, label %bb13
+
+bb12:		; preds = %entry
+	unreachable
+
+bb13:		; preds = %entry
+	ret i64 %w.0
+}

diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
new file mode 100644
index 0000000..b040095
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 | not grep pushf
+
+	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
+
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind  {
+entry:
+	%tmp910 = add i32 0, %offset		; <i32> [#uses=1]
+	br i1 true, label %bb951, label %bb917
+
+bb917:		; preds = %entry
+	ret i32 0
+
+bb951:		; preds = %bb986, %entry
+	%tmp955 = sdiv i32 0, 2		; <i32> [#uses=3]
+	%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0		; <i32*> [#uses=1]
+	br i1 true, label %bb986, label %bb967
+
+bb967:		; preds = %bb951
+	ret i32 0
+
+bb986:		; preds = %bb951
+	%tmp993 = load i32* %tmp961, align 4		; <i32> [#uses=1]
+	%tmp995 = icmp ugt i32 %tmp993, %tmp910		; <i1> [#uses=2]
+	%tmp1002 = add i32 %tmp955, 1		; <i32> [#uses=1]
+	%low.0 = select i1 %tmp995, i32 0, i32 %tmp1002		; <i32> [#uses=1]
+	%high.0 = select i1 %tmp995, i32 %tmp955, i32 0		; <i32> [#uses=1]
+	%tmp1006 = icmp eq i32 %low.0, %high.0		; <i1> [#uses=1]
+	br i1 %tmp1006, label %UnifiedReturnBlock, label %bb951
+
+UnifiedReturnBlock:		; preds = %bb986
+	ret i32 1
+}

diff --git a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
new file mode 100644
index 0000000..6997d53
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -o - | grep sinl
+
+target triple = "i686-pc-linux-gnu"
+
+define x86_fp80 @f(x86_fp80 %x) nounwind  {
+entry:
+	%tmp2 = tail call x86_fp80 @sinl( x86_fp80 %x ) nounwind readonly 		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %tmp2
+}
+
+declare x86_fp80 @sinl(x86_fp80) nounwind readonly 

diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
new file mode 100644
index 0000000..d795610
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=local
+
+define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
+entry:
+	%tmp71 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp73 = fadd x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
+	%tmp7374 = fptrunc x86_fp80 %tmp73 to double		; <double> [#uses=1]
+	store double %tmp7374, double* null, align 8
+	%tmp81 = load double* null, align 8		; <double> [#uses=1]
+	%tmp82 = fadd double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
+	%tmp83 = fdiv double %tmp82, 3.000000e+00		; <double> [#uses=1]
+	%tmp84 = call double @cos( double %tmp83 )		; <double> [#uses=1]
+	%tmp85 = fmul double 0.000000e+00, %tmp84		; <double> [#uses=1]
+	%tmp8586 = fpext double %tmp85 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp87 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp89 = fadd x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
+	%tmp8990 = fptrunc x86_fp80 %tmp89 to double		; <double> [#uses=1]
+	store double %tmp8990, double* null, align 8
+	%tmp97 = load double* null, align 8		; <double> [#uses=1]
+	%tmp98 = fadd double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
+	%tmp99 = fdiv double %tmp98, 3.000000e+00		; <double> [#uses=1]
+	%tmp100 = call double @cos( double %tmp99 )		; <double> [#uses=1]
+	%tmp101 = fmul double 0.000000e+00, %tmp100		; <double> [#uses=1]
+	%tmp101102 = fpext double %tmp101 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp103 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp105 = fadd x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
+	%tmp105106 = fptrunc x86_fp80 %tmp105 to double		; <double> [#uses=1]
+	store double %tmp105106, double* null, align 8
+	ret void
+}
+
+declare double @cos(double)

diff --git a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
new file mode 100644
index 0000000..e91f52e
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | not grep IMPLICIT_DEF
+
+	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
+
+define void @localize_local_bb19_bb(%struct.node_t** %cur_node) {
+newFuncRoot:
+	%tmp1 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp2 = getelementptr %struct.node_t* %tmp1, i32 0, i32 4		; <double**> [#uses=1]
+	%tmp3 = load double** %tmp2, align 4		; <double*> [#uses=1]
+	%tmp4 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp5 = getelementptr %struct.node_t* %tmp4, i32 0, i32 4		; <double**> [#uses=1]
+	store double* %tmp3, double** %tmp5, align 4
+	%tmp6 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp7 = getelementptr %struct.node_t* %tmp6, i32 0, i32 3		; <double***> [#uses=1]
+	%tmp8 = load double*** %tmp7, align 4		; <double**> [#uses=1]
+	%tmp9 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp10 = getelementptr %struct.node_t* %tmp9, i32 0, i32 3		; <double***> [#uses=1]
+	store double** %tmp8, double*** %tmp10, align 4
+	%tmp11 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp12 = getelementptr %struct.node_t* %tmp11, i32 0, i32 0		; <double**> [#uses=1]
+	%tmp13 = load double** %tmp12, align 4		; <double*> [#uses=1]
+	%tmp14 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp15 = getelementptr %struct.node_t* %tmp14, i32 0, i32 0		; <double**> [#uses=1]
+	store double* %tmp13, double** %tmp15, align 4
+	%tmp16 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp17 = getelementptr %struct.node_t* %tmp16, i32 0, i32 1		; <%struct.node_t**> [#uses=1]
+	%tmp18 = load %struct.node_t** %tmp17, align 4		; <%struct.node_t*> [#uses=1]
+	store %struct.node_t* %tmp18, %struct.node_t** %cur_node, align 4
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
new file mode 100644
index 0000000..704b2ba
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-16-Trampoline.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
+
+define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
+entry:
+	%tramp22 = call i8* @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 

diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
new file mode 100644
index 0000000..387645f
--- /dev/null
+++ b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep {.byte	0}
+target triple = "i686-apple-darwin8"
+
+
+define void @bork() noreturn nounwind  {
+entry:
+        unreachable
+}

diff --git a/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
new file mode 100644
index 0000000..443a32d
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-05-ISelCrash.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+; PR1975
+
+@nodes = external global i64		; <i64*> [#uses=2]
+
+define fastcc i32 @ab(i32 %alpha, i32 %beta) nounwind  {
+entry:
+	%tmp1 = load i64* @nodes, align 8		; <i64> [#uses=1]
+	%tmp2 = add i64 %tmp1, 1		; <i64> [#uses=1]
+	store i64 %tmp2, i64* @nodes, align 8
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
new file mode 100644
index 0000000..d2d5149
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xor | grep CPI
+
+define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
+entry:
+	%memtmp = alloca { double, double }, align 8		; <{ double, double }*> [#uses=3]
+	%tmp4 = fsub double -0.000000e+00, %z.1		; <double> [#uses=1]
+	call void @casinh( { double, double }* sret  %memtmp, double %tmp4, double %z.0 ) nounwind 
+	%tmp19 = getelementptr { double, double }* %memtmp, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp20 = load double* %tmp19, align 8		; <double> [#uses=1]
+	%tmp22 = getelementptr { double, double }* %memtmp, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp23 = load double* %tmp22, align 8		; <double> [#uses=1]
+	%tmp32 = fsub double -0.000000e+00, %tmp20		; <double> [#uses=1]
+	%tmp37 = getelementptr { double, double }* %agg.result, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp23, double* %tmp37, align 8
+	%tmp40 = getelementptr { double, double }* %agg.result, i32 0, i32 1		; <double*> [#uses=1]
+	store double %tmp32, double* %tmp40, align 8
+	ret void
+}
+
+declare void @casinh({ double, double }* sret , double, double) nounwind 

diff --git a/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
new file mode 100644
index 0000000..b772d77
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep andpd | not grep esp
+
+declare double @llvm.sqrt.f64(double) nounwind readnone 
+
+declare fastcc void @ApplyGivens(double**, double, double, i32, i32, i32, i32) nounwind 
+
+declare double @fabs(double)
+
+define void @main_bb114_2E_outer_2E_i_bb3_2E_i27(double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i) {
+newFuncRoot:
+	br label %bb3.i27
+
+bb111.i77.bb121.i_crit_edge.exitStub:		; preds = %bb111.i77
+	ret void
+
+bb3.i27:		; preds = %bb111.i77.bb3.i27_crit_edge, %newFuncRoot
+	%indvar94.i = phi i32 [ 0, %newFuncRoot ], [ %tmp113.i76, %bb111.i77.bb3.i27_crit_edge ]		; <i32> [#uses=6]
+	%tmp6.i20 = getelementptr [51 x double*]* %tmp12.i.i, i32 0, i32 %indvar94.i		; <double**> [#uses=1]
+	%tmp7.i21 = load double** %tmp6.i20, align 4		; <double*> [#uses=2]
+	%tmp10.i = add i32 %indvar94.i, %i.0.reg2mem.0.ph.i		; <i32> [#uses=5]
+	%tmp11.i22 = getelementptr double* %tmp7.i21, i32 %tmp10.i		; <double*> [#uses=1]
+	%tmp12.i23 = load double* %tmp11.i22, align 8		; <double> [#uses=4]
+	%tmp20.i24 = add i32 %tmp19.i, %indvar94.i		; <i32> [#uses=3]
+	%tmp21.i = getelementptr double* %tmp7.i21, i32 %tmp20.i24		; <double*> [#uses=1]
+	%tmp22.i25 = load double* %tmp21.i, align 8		; <double> [#uses=3]
+	%tmp1.i.i26 = fcmp oeq double %tmp12.i23, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp1.i.i26, label %bb3.i27.Givens.exit.i49_crit_edge, label %bb5.i.i31
+
+bb5.i.i31:		; preds = %bb3.i27
+	%tmp7.i.i28 = call double @fabs( double %tmp12.i23 ) nounwind 		; <double> [#uses=1]
+	%tmp9.i.i29 = call double @fabs( double %tmp22.i25 ) nounwind 		; <double> [#uses=1]
+	%tmp10.i.i30 = fcmp ogt double %tmp7.i.i28, %tmp9.i.i29		; <i1> [#uses=1]
+	br i1 %tmp10.i.i30, label %bb13.i.i37, label %bb30.i.i43
+
+bb13.i.i37:		; preds = %bb5.i.i31
+	%tmp15.i.i32 = fsub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
+	%tmp17.i.i33 = fdiv double %tmp15.i.i32, %tmp12.i23		; <double> [#uses=3]
+	%tmp20.i4.i = fmul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
+	%tmp21.i.i34 = fadd double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp22.i.i35 = call double @llvm.sqrt.f64( double %tmp21.i.i34 ) nounwind 		; <double> [#uses=1]
+	%tmp23.i5.i = fdiv double 1.000000e+00, %tmp22.i.i35		; <double> [#uses=2]
+	%tmp28.i.i36 = fmul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
+	br label %Givens.exit.i49
+
+bb30.i.i43:		; preds = %bb5.i.i31
+	%tmp32.i.i38 = fsub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
+	%tmp34.i.i39 = fdiv double %tmp32.i.i38, %tmp22.i25		; <double> [#uses=3]
+	%tmp37.i6.i = fmul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
+	%tmp38.i.i40 = fadd double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp39.i7.i = call double @llvm.sqrt.f64( double %tmp38.i.i40 ) nounwind 		; <double> [#uses=1]
+	%tmp40.i.i41 = fdiv double 1.000000e+00, %tmp39.i7.i		; <double> [#uses=2]
+	%tmp45.i.i42 = fmul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
+	br label %Givens.exit.i49
+
+Givens.exit.i49:		; preds = %bb3.i27.Givens.exit.i49_crit_edge, %bb30.i.i43, %bb13.i.i37
+	%s.0.i44 = phi double [ %tmp45.i.i42, %bb30.i.i43 ], [ %tmp23.i5.i, %bb13.i.i37 ], [ 0.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
+	%c.0.i45 = phi double [ %tmp40.i.i41, %bb30.i.i43 ], [ %tmp28.i.i36, %bb13.i.i37 ], [ 1.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
+	%tmp26.i46 = add i32 %tmp24.i, %indvar94.i		; <i32> [#uses=2]
+	%tmp27.i47 = icmp slt i32 %tmp26.i46, 51		; <i1> [#uses=1]
+	%min.i48 = select i1 %tmp27.i47, i32 %tmp26.i46, i32 50		; <i32> [#uses=1]
+	call fastcc void @ApplyGivens( double** %tmp12.sub.i.i, double %s.0.i44, double %c.0.i45, i32 %tmp20.i24, i32 %tmp10.i, i32 %indvar94.i, i32 %min.i48 ) nounwind 
+	br label %codeRepl
+
+codeRepl:		; preds = %Givens.exit.i49
+	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i( i32 %tmp10.i, i32 %tmp20.i24, double %s.0.i44, double %c.0.i45, [51 x double*]* %tmp12.i.i.i )
+	br label %ApplyRGivens.exit49.i
+
+ApplyRGivens.exit49.i:		; preds = %codeRepl
+	%tmp10986.i = icmp sgt i32 %tmp11688.i, %tmp10.i		; <i1> [#uses=1]
+	br i1 %tmp10986.i, label %ApplyRGivens.exit49.i.bb52.i57_crit_edge, label %ApplyRGivens.exit49.i.bb111.i77_crit_edge
+
+codeRepl1:		; preds = %ApplyRGivens.exit49.i.bb52.i57_crit_edge
+	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57( i32 %tmp10.i, double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i )
+	br label %bb105.i.bb111.i77_crit_edge
+
+bb111.i77:		; preds = %bb105.i.bb111.i77_crit_edge, %ApplyRGivens.exit49.i.bb111.i77_crit_edge
+	%tmp113.i76 = add i32 %indvar94.i, 1		; <i32> [#uses=2]
+	%tmp118.i = icmp sgt i32 %tmp11688.i, %tmp113.i76		; <i1> [#uses=1]
+	br i1 %tmp118.i, label %bb111.i77.bb3.i27_crit_edge, label %bb111.i77.bb121.i_crit_edge.exitStub
+
+bb3.i27.Givens.exit.i49_crit_edge:		; preds = %bb3.i27
+	br label %Givens.exit.i49
+
+ApplyRGivens.exit49.i.bb52.i57_crit_edge:		; preds = %ApplyRGivens.exit49.i
+	br label %codeRepl1
+
+ApplyRGivens.exit49.i.bb111.i77_crit_edge:		; preds = %ApplyRGivens.exit49.i
+	br label %bb111.i77
+
+bb105.i.bb111.i77_crit_edge:		; preds = %codeRepl1
+	br label %bb111.i77
+
+bb111.i77.bb3.i27_crit_edge:		; preds = %bb111.i77
+	br label %bb3.i27
+}
+
+declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i(i32, i32, double, double, [51 x double*]*)
+
+declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57(i32, double**, [51 x double*]*, i32, i32, i32, i32, [51 x double*]*)

diff --git a/test/CodeGen/X86/2008-02-14-BitMiscompile.ll b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
new file mode 100644
index 0000000..1983f1d
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-14-BitMiscompile.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep and
+define i32 @test(i1 %A) {
+	%B = zext i1 %A to i32		; <i32> [#uses=1]
+	%C = sub i32 0, %B		; <i32> [#uses=1]
+	%D = and i32 %C, 255		; <i32> [#uses=1]
+	ret i32 %D
+}
+

diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
new file mode 100644
index 0000000..7463a0e
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll

@@ -0,0 +1,219 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; PR1909
+
+@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
+
+define void @minmax(float* %result) nounwind optsize {
+entry:
+	%tmp2 = load float* %result, align 4		; <float> [#uses=6]
+	%tmp4 = getelementptr float* %result, i32 2		; <float*> [#uses=5]
+	%tmp5 = load float* %tmp4, align 4		; <float> [#uses=10]
+	%tmp7 = getelementptr float* %result, i32 4		; <float*> [#uses=5]
+	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=8]
+	%tmp10 = getelementptr float* %result, i32 6		; <float*> [#uses=3]
+	%tmp11 = load float* %tmp10, align 4		; <float> [#uses=8]
+	%tmp12 = fcmp olt float %tmp8, %tmp11		; <i1> [#uses=5]
+	br i1 %tmp12, label %bb, label %bb21
+
+bb:		; preds = %entry
+	%tmp23469 = fcmp olt float %tmp5, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:		; preds = %entry
+	%tmp23 = fcmp olt float %tmp5, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp23, label %bb26, label %bb30
+
+bb26:		; preds = %bb21, %bb
+	%tmp52471 = fcmp olt float %tmp2, %tmp5		; <i1> [#uses=1]
+	br i1 %tmp52471, label %bb111, label %bb59
+
+bb30:		; preds = %bb21, %bb
+	br i1 %tmp12, label %bb40, label %bb50
+
+bb40:		; preds = %bb30
+	%tmp52473 = fcmp olt float %tmp2, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp52473, label %bb111, label %bb59
+
+bb50:		; preds = %bb30
+	%tmp52 = fcmp olt float %tmp2, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp52, label %bb111, label %bb59
+
+bb59:		; preds = %bb50, %bb40, %bb26
+	br i1 %tmp12, label %bb72, label %bb80
+
+bb72:		; preds = %bb59
+	%tmp82475 = fcmp olt float %tmp5, %tmp8		; <i1> [#uses=2]
+	%brmerge786 = or i1 %tmp82475, %tmp12		; <i1> [#uses=1]
+	%tmp4.mux787 = select i1 %tmp82475, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge786, label %bb111, label %bb103
+
+bb80:		; preds = %bb59
+	%tmp82 = fcmp olt float %tmp5, %tmp11		; <i1> [#uses=2]
+	%brmerge = or i1 %tmp82, %tmp12		; <i1> [#uses=1]
+	%tmp4.mux = select i1 %tmp82, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge, label %bb111, label %bb103
+
+bb103:		; preds = %bb80, %bb72
+	br label %bb111
+
+bb111:		; preds = %bb103, %bb80, %bb72, %bb50, %bb40, %bb26
+	%iftmp.0.0.in = phi float* [ %tmp10, %bb103 ], [ %result, %bb26 ], [ %result, %bb40 ], [ %result, %bb50 ], [ %tmp4.mux, %bb80 ], [ %tmp4.mux787, %bb72 ]		; <float*> [#uses=1]
+	%iftmp.0.0 = load float* %iftmp.0.0.in		; <float> [#uses=1]
+	%tmp125 = fcmp ogt float %tmp8, %tmp11		; <i1> [#uses=5]
+	br i1 %tmp125, label %bb128, label %bb136
+
+bb128:		; preds = %bb111
+	%tmp138477 = fcmp ogt float %tmp5, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp138477, label %bb141, label %bb145
+
+bb136:		; preds = %bb111
+	%tmp138 = fcmp ogt float %tmp5, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp138, label %bb141, label %bb145
+
+bb141:		; preds = %bb136, %bb128
+	%tmp167479 = fcmp ogt float %tmp2, %tmp5		; <i1> [#uses=1]
+	br i1 %tmp167479, label %bb226, label %bb174
+
+bb145:		; preds = %bb136, %bb128
+	br i1 %tmp125, label %bb155, label %bb165
+
+bb155:		; preds = %bb145
+	%tmp167481 = fcmp ogt float %tmp2, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp167481, label %bb226, label %bb174
+
+bb165:		; preds = %bb145
+	%tmp167 = fcmp ogt float %tmp2, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp167, label %bb226, label %bb174
+
+bb174:		; preds = %bb165, %bb155, %bb141
+	br i1 %tmp125, label %bb187, label %bb195
+
+bb187:		; preds = %bb174
+	%tmp197483 = fcmp ogt float %tmp5, %tmp8		; <i1> [#uses=2]
+	%brmerge790 = or i1 %tmp197483, %tmp125		; <i1> [#uses=1]
+	%tmp4.mux791 = select i1 %tmp197483, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge790, label %bb226, label %bb218
+
+bb195:		; preds = %bb174
+	%tmp197 = fcmp ogt float %tmp5, %tmp11		; <i1> [#uses=2]
+	%brmerge788 = or i1 %tmp197, %tmp125		; <i1> [#uses=1]
+	%tmp4.mux789 = select i1 %tmp197, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge788, label %bb226, label %bb218
+
+bb218:		; preds = %bb195, %bb187
+	br label %bb226
+
+bb226:		; preds = %bb218, %bb195, %bb187, %bb165, %bb155, %bb141
+	%iftmp.7.0.in = phi float* [ %tmp10, %bb218 ], [ %result, %bb141 ], [ %result, %bb155 ], [ %result, %bb165 ], [ %tmp4.mux789, %bb195 ], [ %tmp4.mux791, %bb187 ]		; <float*> [#uses=1]
+	%iftmp.7.0 = load float* %iftmp.7.0.in		; <float> [#uses=1]
+	%tmp229 = getelementptr float* %result, i32 1		; <float*> [#uses=7]
+	%tmp230 = load float* %tmp229, align 4		; <float> [#uses=6]
+	%tmp232 = getelementptr float* %result, i32 3		; <float*> [#uses=5]
+	%tmp233 = load float* %tmp232, align 4		; <float> [#uses=10]
+	%tmp235 = getelementptr float* %result, i32 5		; <float*> [#uses=5]
+	%tmp236 = load float* %tmp235, align 4		; <float> [#uses=8]
+	%tmp238 = getelementptr float* %result, i32 7		; <float*> [#uses=3]
+	%tmp239 = load float* %tmp238, align 4		; <float> [#uses=8]
+	%tmp240 = fcmp olt float %tmp236, %tmp239		; <i1> [#uses=5]
+	br i1 %tmp240, label %bb243, label %bb251
+
+bb243:		; preds = %bb226
+	%tmp253485 = fcmp olt float %tmp233, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp253485, label %bb256, label %bb260
+
+bb251:		; preds = %bb226
+	%tmp253 = fcmp olt float %tmp233, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp253, label %bb256, label %bb260
+
+bb256:		; preds = %bb251, %bb243
+	%tmp282487 = fcmp olt float %tmp230, %tmp233		; <i1> [#uses=1]
+	br i1 %tmp282487, label %bb341, label %bb289
+
+bb260:		; preds = %bb251, %bb243
+	br i1 %tmp240, label %bb270, label %bb280
+
+bb270:		; preds = %bb260
+	%tmp282489 = fcmp olt float %tmp230, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp282489, label %bb341, label %bb289
+
+bb280:		; preds = %bb260
+	%tmp282 = fcmp olt float %tmp230, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp282, label %bb341, label %bb289
+
+bb289:		; preds = %bb280, %bb270, %bb256
+	br i1 %tmp240, label %bb302, label %bb310
+
+bb302:		; preds = %bb289
+	%tmp312491 = fcmp olt float %tmp233, %tmp236		; <i1> [#uses=2]
+	%brmerge793 = or i1 %tmp312491, %tmp240		; <i1> [#uses=1]
+	%tmp232.mux794 = select i1 %tmp312491, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge793, label %bb341, label %bb333
+
+bb310:		; preds = %bb289
+	%tmp312 = fcmp olt float %tmp233, %tmp239		; <i1> [#uses=2]
+	%brmerge792 = or i1 %tmp312, %tmp240		; <i1> [#uses=1]
+	%tmp232.mux = select i1 %tmp312, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge792, label %bb341, label %bb333
+
+bb333:		; preds = %bb310, %bb302
+	br label %bb341
+
+bb341:		; preds = %bb333, %bb310, %bb302, %bb280, %bb270, %bb256
+	%iftmp.14.0.in = phi float* [ %tmp238, %bb333 ], [ %tmp229, %bb280 ], [ %tmp229, %bb270 ], [ %tmp229, %bb256 ], [ %tmp232.mux, %bb310 ], [ %tmp232.mux794, %bb302 ]		; <float*> [#uses=1]
+	%iftmp.14.0 = load float* %iftmp.14.0.in		; <float> [#uses=1]
+	%tmp355 = fcmp ogt float %tmp236, %tmp239		; <i1> [#uses=5]
+	br i1 %tmp355, label %bb358, label %bb366
+
+bb358:		; preds = %bb341
+	%tmp368493 = fcmp ogt float %tmp233, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp368493, label %bb371, label %bb375
+
+bb366:		; preds = %bb341
+	%tmp368 = fcmp ogt float %tmp233, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp368, label %bb371, label %bb375
+
+bb371:		; preds = %bb366, %bb358
+	%tmp397495 = fcmp ogt float %tmp230, %tmp233		; <i1> [#uses=1]
+	br i1 %tmp397495, label %bb456, label %bb404
+
+bb375:		; preds = %bb366, %bb358
+	br i1 %tmp355, label %bb385, label %bb395
+
+bb385:		; preds = %bb375
+	%tmp397497 = fcmp ogt float %tmp230, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp397497, label %bb456, label %bb404
+
+bb395:		; preds = %bb375
+	%tmp397 = fcmp ogt float %tmp230, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp397, label %bb456, label %bb404
+
+bb404:		; preds = %bb395, %bb385, %bb371
+	br i1 %tmp355, label %bb417, label %bb425
+
+bb417:		; preds = %bb404
+	%tmp427499 = fcmp ogt float %tmp233, %tmp236		; <i1> [#uses=2]
+	%brmerge797 = or i1 %tmp427499, %tmp355		; <i1> [#uses=1]
+	%tmp232.mux798 = select i1 %tmp427499, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge797, label %bb456, label %bb448
+
+bb425:		; preds = %bb404
+	%tmp427 = fcmp ogt float %tmp233, %tmp239		; <i1> [#uses=2]
+	%brmerge795 = or i1 %tmp427, %tmp355		; <i1> [#uses=1]
+	%tmp232.mux796 = select i1 %tmp427, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge795, label %bb456, label %bb448
+
+bb448:		; preds = %bb425, %bb417
+	br label %bb456
+
+bb456:		; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
+	%iftmp.21.0.in = phi float* [ %tmp238, %bb448 ], [ %tmp229, %bb395 ], [ %tmp229, %bb385 ], [ %tmp229, %bb371 ], [ %tmp232.mux796, %bb425 ], [ %tmp232.mux798, %bb417 ]		; <float*> [#uses=1]
+	%iftmp.21.0 = load float* %iftmp.21.0.in		; <float> [#uses=1]
+	%tmp458459 = fpext float %iftmp.21.0 to double		; <double> [#uses=1]
+	%tmp460461 = fpext float %iftmp.7.0 to double		; <double> [#uses=1]
+	%tmp462463 = fpext float %iftmp.14.0 to double		; <double> [#uses=1]
+	%tmp464465 = fpext float %iftmp.0.0 to double		; <double> [#uses=1]
+	%tmp467 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind 		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind 

diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
new file mode 100644
index 0000000..5115e48
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s | grep {a:} | not grep ax
+; RUN: llc < %s | grep {b:} | not grep ax
+; PR2078
+; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
+; allocated to the input/output register.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@pixels = weak global i32 0		; <i32*> [#uses=2]
+
+define void @test() nounwind  {
+entry:
+	%tmp = load i32* @pixels, align 4		; <i32> [#uses=1]
+	%tmp1 = tail call i32 asm sideeffect "a: $0 $1", "=r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i32 %tmp ) nounwind 		; <i32> [#uses=1]
+	store i32 %tmp1, i32* @pixels, align 4
+	ret void
+}
+
+define void @test2(i16* %block, i8* %pixels, i32 %line_size) nounwind  {
+entry:
+	%tmp1 = getelementptr i16* %block, i32 64		; <i16*> [#uses=1]
+	%tmp3 = tail call i8* asm sideeffect "b: $0 $1 $2", "=r,r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i16* %tmp1, i8* %pixels ) nounwind 		; <i8*> [#uses=0]
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
new file mode 100644
index 0000000..6b1eefe
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -regalloc=local -march=x86 -mattr=+mmx | grep esi
+; PR2082
+; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
+; registers.
+define void @transpose4x4(i8* %dst, i8* %src, i32 %dst_stride, i32 %src_stride) {
+entry:
+	%dst_addr = alloca i8*		; <i8**> [#uses=5]
+	%src_addr = alloca i8*		; <i8**> [#uses=5]
+	%dst_stride_addr = alloca i32		; <i32*> [#uses=4]
+	%src_stride_addr = alloca i32		; <i32*> [#uses=4]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %dst, i8** %dst_addr
+	store i8* %src, i8** %src_addr
+	store i32 %dst_stride, i32* %dst_stride_addr
+	store i32 %src_stride, i32* %src_stride_addr
+	%tmp = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp1 = getelementptr i8* %tmp, i32 0		; <i8*> [#uses=1]
+	%tmp12 = bitcast i8* %tmp1 to i32*		; <i32*> [#uses=1]
+	%tmp3 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp4 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp5 = getelementptr i8* %tmp3, i32 %tmp4		; <i8*> [#uses=1]
+	%tmp56 = bitcast i8* %tmp5 to i32*		; <i32*> [#uses=1]
+	%tmp7 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp8 = mul i32 %tmp7, 2		; <i32> [#uses=1]
+	%tmp9 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp10 = getelementptr i8* %tmp9, i32 %tmp8		; <i8*> [#uses=1]
+	%tmp1011 = bitcast i8* %tmp10 to i32*		; <i32*> [#uses=1]
+	%tmp13 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp14 = mul i32 %tmp13, 3		; <i32> [#uses=1]
+	%tmp15 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp16 = getelementptr i8* %tmp15, i32 %tmp14		; <i8*> [#uses=1]
+	%tmp1617 = bitcast i8* %tmp16 to i32*		; <i32*> [#uses=1]
+	%tmp18 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp19 = getelementptr i8* %tmp18, i32 0		; <i8*> [#uses=1]
+	%tmp1920 = bitcast i8* %tmp19 to i32*		; <i32*> [#uses=1]
+	%tmp21 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp22 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp23 = getelementptr i8* %tmp21, i32 %tmp22		; <i8*> [#uses=1]
+	%tmp2324 = bitcast i8* %tmp23 to i32*		; <i32*> [#uses=1]
+	%tmp25 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp26 = mul i32 %tmp25, 2		; <i32> [#uses=1]
+	%tmp27 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp28 = getelementptr i8* %tmp27, i32 %tmp26		; <i8*> [#uses=1]
+	%tmp2829 = bitcast i8* %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp30 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp31 = mul i32 %tmp30, 3		; <i32> [#uses=1]
+	%tmp32 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp33 = getelementptr i8* %tmp32, i32 %tmp31		; <i8*> [#uses=1]
+	%tmp3334 = bitcast i8* %tmp33 to i32*		; <i32*> [#uses=1]
+	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind 
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
new file mode 100644
index 0000000..8d6bb0d
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
+; rdar://5761454
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define  %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind  {
+entry:
+	br i1 true, label %bb43.i, label %bb.i
+
+bb.i:		; preds = %entry
+	ret %struct.quad_struct* null
+
+bb43.i:		; preds = %entry
+	br i1 true, label %CheckOutside.exit40.i, label %bb11.i38.i
+
+bb11.i38.i:		; preds = %bb43.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit40.i:		; preds = %bb43.i
+	br i1 true, label %CheckOutside.exit30.i, label %bb11.i28.i
+
+bb11.i28.i:		; preds = %CheckOutside.exit40.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit30.i:		; preds = %CheckOutside.exit40.i
+	br i1 true, label %CheckOutside.exit20.i, label %bb11.i18.i
+
+bb11.i18.i:		; preds = %CheckOutside.exit30.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit20.i:		; preds = %CheckOutside.exit30.i
+	br i1 true, label %bb34, label %bb11.i8.i
+
+bb11.i8.i:		; preds = %CheckOutside.exit20.i
+	ret %struct.quad_struct* null
+
+bb34:		; preds = %CheckOutside.exit20.i
+	%tmp15.reg2mem.0 = sdiv i32 %size, 2		; <i32> [#uses=7]
+	%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0		; <i32> [#uses=2]
+	%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0		; <i32> [#uses=2]
+	%tmp92 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp99 = add i32 0, %hi_proc		; <i32> [#uses=1]
+	%tmp100 = sdiv i32 %tmp99, 2		; <i32> [#uses=1]
+	%tmp110 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp122 = add i32 %tmp15.reg2mem.0, %center_y		; <i32> [#uses=2]
+	%tmp129 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp147 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
new file mode 100644
index 0000000..1d31859
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mattr=+sse2
+; PR2076
+
+define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind  {
+entry:
+	%tmp164 = getelementptr [16 x i32]* null, i32 0, i32 11		; <i32*> [#uses=1]
+	%tmp169 = getelementptr [16 x i32]* null, i32 0, i32 13		; <i32*> [#uses=1]
+	%tmp174 = getelementptr [16 x i32]* null, i32 0, i32 15		; <i32*> [#uses=1]
+	%tmp154.sum317 = add i32 0, %stride		; <i32> [#uses=1]
+	%tmp154.sum315 = mul i32 %stride, 6		; <i32> [#uses=1]
+	%tmp154.sum = mul i32 %stride, 7		; <i32> [#uses=1]
+	%pix_addr.0327.rec = mul i32 0, 0		; <i32> [#uses=4]
+	br i1 false, label %bb292, label %bb32
+
+bb32:		; preds = %entry
+	%pix_addr.0327.sum340 = add i32 %pix_addr.0327.rec, 0		; <i32> [#uses=1]
+	%tmp154 = getelementptr i8* %pix, i32 %pix_addr.0327.sum340		; <i8*> [#uses=1]
+	%tmp177178 = bitcast i8* %tmp154 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum339 = add i32 %pix_addr.0327.rec, %tmp154.sum317		; <i32> [#uses=1]
+	%tmp181 = getelementptr i8* %pix, i32 %pix_addr.0327.sum339		; <i8*> [#uses=1]
+	%tmp181182 = bitcast i8* %tmp181 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum338 = add i32 %pix_addr.0327.rec, %tmp154.sum315		; <i32> [#uses=1]
+	%tmp186 = getelementptr i8* %pix, i32 %pix_addr.0327.sum338		; <i8*> [#uses=1]
+	%tmp186187 = bitcast i8* %tmp186 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum		; <i32> [#uses=1]
+	%tmp191 = getelementptr i8* %pix, i32 %pix_addr.0327.sum337		; <i8*> [#uses=1]
+	%tmp191192 = bitcast i8* %tmp191 to i32*		; <i32*> [#uses=1]
+	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind 
+	unreachable
+
+bb292:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
new file mode 100644
index 0000000..6615b8c
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.XX = type <{ i8 }>
+	%struct.YY = type { i64 }
+	%struct.ZZ = type opaque
+
+define i8 @f(%struct.XX*** %fontMap, %struct.XX* %uen) signext  {
+entry:
+	%tmp45 = add i16 0, 1		; <i16> [#uses=2]
+	br i1 false, label %bb124, label %bb53
+
+bb53:		; preds = %entry
+	%tmp55 = call %struct.YY** @AA( i64 1, %struct.XX* %uen )		; <%struct.YY**> [#uses=3]
+	%tmp2728128 = load %struct.XX** null		; <%struct.XX*> [#uses=1]
+	%tmp61 = load %struct.YY** %tmp55, align 8		; <%struct.YY*> [#uses=1]
+	%tmp62 = getelementptr %struct.YY* %tmp61, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp63 = load i64* %tmp62, align 8		; <i64> [#uses=1]
+	%tmp6566 = zext i16 %tmp45 to i64		; <i64> [#uses=1]
+	%tmp67 = shl i64 %tmp6566, 1		; <i64> [#uses=1]
+	call void @BB( %struct.YY** %tmp55, i64 %tmp67, i8 signext  0, %struct.XX* %uen )
+	%tmp121131 = icmp eq i16 %tmp45, 1		; <i1> [#uses=1]
+	br i1 %tmp121131, label %bb124, label %bb70.preheader
+
+bb70.preheader:		; preds = %bb53
+	%tmp72 = bitcast %struct.XX* %tmp2728128 to %struct.ZZ***		; <%struct.ZZ***> [#uses=1]
+	br label %bb70
+
+bb70:		; preds = %bb119, %bb70.preheader
+	%indvar133 = phi i32 [ %indvar.next134, %bb119 ], [ 0, %bb70.preheader ]		; <i32> [#uses=2]
+	%tmp.135 = trunc i64 %tmp63 to i32		; <i32> [#uses=1]
+	%tmp136 = shl i32 %indvar133, 1		; <i32> [#uses=1]
+	%DD = add i32 %tmp136, %tmp.135		; <i32> [#uses=1]
+	%tmp73 = load %struct.ZZ*** %tmp72, align 8		; <%struct.ZZ**> [#uses=0]
+	br i1 false, label %bb119, label %bb77
+
+bb77:		; preds = %bb70
+	%tmp8384 = trunc i32 %DD to i16		; <i16> [#uses=1]
+	%tmp85 = sub i16 0, %tmp8384		; <i16> [#uses=1]
+	store i16 %tmp85, i16* null, align 8
+	call void @CC( %struct.YY** %tmp55, i64 0, i64 2, i8* null, %struct.XX* %uen )
+	ret i8 0
+
+bb119:		; preds = %bb70
+	%indvar.next134 = add i32 %indvar133, 1		; <i32> [#uses=1]
+	br label %bb70
+
+bb124:		; preds = %bb53, %entry
+	ret i8 undef
+}
+
+declare %struct.YY** @AA(i64, %struct.XX*)
+
+declare void @BB(%struct.YY**, i64, i8 signext , %struct.XX*)
+
+declare void @CC(%struct.YY**, i64, i64, i8*, %struct.XX*)

diff --git a/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
new file mode 100644
index 0000000..0b4eb3a
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @dct_unquantize_h263_intra_mmx(i16* %block, i32 %n, i32 %qscale) nounwind  {
+entry:
+	%tmp1 = shl i32 %qscale, 1		; <i32> [#uses=1]
+	br i1 false, label %bb46, label %bb59
+
+bb46:		; preds = %entry
+	ret void
+
+bb59:		; preds = %entry
+	tail call void asm sideeffect "movd $1, %mm6                 \0A\09packssdw %mm6, %mm6          \0A\09packssdw %mm6, %mm6          \0A\09movd $2, %mm5                 \0A\09pxor %mm7, %mm7              \0A\09packssdw %mm5, %mm5          \0A\09packssdw %mm5, %mm5          \0A\09psubw %mm5, %mm7             \0A\09pxor %mm4, %mm4              \0A\09.align 1<<4\0A\091:                             \0A\09movq ($0, $3), %mm0           \0A\09movq 8($0, $3), %mm1          \0A\09pmullw %mm6, %mm0            \0A\09pmullw %mm6, %mm1            \0A\09movq ($0, $3), %mm2           \0A\09movq 8($0, $3), %mm3          \0A\09pcmpgtw %mm4, %mm2           \0A\09pcmpgtw %mm4, %mm3           \0A\09pxor %mm2, %mm0              \0A\09pxor %mm3, %mm1              \0A\09paddw %mm7, %mm0             \0A\09paddw %mm7, %mm1             \0A\09pxor %mm0, %mm2              \0A\09pxor %mm1, %mm3              \0A\09pcmpeqw %mm7, %mm0           \0A\09pcmpeqw %mm7, %mm1           \0A\09pandn %mm2, %mm0             \0A\09pandn %mm3, %mm1             \0A\09movq %mm0, ($0, $3)           \0A\09movq %mm1, 8($0, $3)          \0A\09add $$16, $3                    \0A\09jng 1b                         \0A\09", "r,imr,imr,r,~{dirflag},~{fpsr},~{flags},~{memory}"( i16* null, i32 %tmp1, i32 0, i32 0 ) nounwind 
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
new file mode 100644
index 0000000..ad7950c
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86
+
+	%struct.CompAtom = type <{ %struct.Position, float, i32 }>
+	%struct.Lattice = type { %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, i32, i32, i32 }
+	%struct.Position = type { double, double, double }
+
+define fastcc %struct.CompAtom* @_ZNK7Lattice6createEP8CompAtomii(%struct.Lattice* %this, %struct.CompAtom* %d, i32 %n, i32 %i) {
+entry:
+	%tmp18 = tail call i8* @_Znam( i32 0 )		; <i8*> [#uses=1]
+	%tmp1819 = bitcast i8* %tmp18 to %struct.CompAtom*		; <%struct.CompAtom*> [#uses=4]
+	%tmp3286 = icmp eq i32 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp3286, label %bb35, label %bb24
+
+bb24:		; preds = %bb24, %entry
+	%tmp9.0.reg2mem.0.rec = phi i32 [ %indvar.next, %bb24 ], [ 0, %entry ]		; <i32> [#uses=3]
+	%tmp3.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 1		; <double*> [#uses=0]
+	%tmp5.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 2		; <double*> [#uses=1]
+	store double -9.999900e+04, double* %tmp5.i.i, align 4
+	%indvar.next = add i32 %tmp9.0.reg2mem.0.rec, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %n		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb35, label %bb24
+
+bb35:		; preds = %bb24, %entry
+	%tmp42 = sdiv i32 %i, 9		; <i32> [#uses=1]
+	%tmp43 = add i32 %tmp42, -1		; <i32> [#uses=1]
+	%tmp4344 = sitofp i32 %tmp43 to double		; <double> [#uses=1]
+	%tmp17.i76 = fmul double %tmp4344, 0.000000e+00		; <double> [#uses=1]
+	%tmp48 = sdiv i32 %i, 3		; <i32> [#uses=1]
+	%tmp49 = srem i32 %tmp48, 3		; <i32> [#uses=1]
+	%tmp50 = add i32 %tmp49, -1		; <i32> [#uses=1]
+	%tmp5051 = sitofp i32 %tmp50 to double		; <double> [#uses=1]
+	%tmp17.i63 = fmul double %tmp5051, 0.000000e+00		; <double> [#uses=1]
+	%tmp55 = srem i32 %i, 3		; <i32> [#uses=1]
+	%tmp56 = add i32 %tmp55, -1		; <i32> [#uses=1]
+	%tmp5657 = sitofp i32 %tmp56 to double		; <double> [#uses=1]
+	%tmp15.i49 = getelementptr %struct.Lattice* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp16.i50 = load double* %tmp15.i49, align 4		; <double> [#uses=1]
+	%tmp17.i = fmul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
+	%tmp20.i39 = fadd double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
+	%tmp20.i23 = fadd double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
+	br i1 false, label %bb58.preheader, label %bb81
+
+bb58.preheader:		; preds = %bb35
+	%smax = select i1 false, i32 1, i32 %n		; <i32> [#uses=1]
+	br label %bb58
+
+bb58:		; preds = %bb58, %bb58.preheader
+	%tmp20.i7 = getelementptr %struct.CompAtom* %d, i32 0, i32 2		; <i32*> [#uses=2]
+	%tmp25.i = getelementptr %struct.CompAtom* %tmp1819, i32 0, i32 2		; <i32*> [#uses=2]
+	%tmp74.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp82.i = and i32 %tmp74.i, 134217728		; <i32> [#uses=1]
+	%tmp85.i = or i32 0, %tmp82.i		; <i32> [#uses=1]
+	store i32 %tmp85.i, i32* %tmp25.i, align 1
+	%tmp88.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp95.i = and i32 %tmp88.i, -268435456		; <i32> [#uses=1]
+	%tmp97.i = or i32 0, %tmp95.i		; <i32> [#uses=1]
+	store i32 %tmp97.i, i32* %tmp25.i, align 1
+	%tmp6.i = fadd double 0.000000e+00, %tmp20.i23		; <double> [#uses=0]
+	%exitcond96 = icmp eq i32 0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond96, label %bb81, label %bb58
+
+bb81:		; preds = %bb58, %bb35
+	ret %struct.CompAtom* %tmp1819
+}
+
+declare i8* @_Znam(i32)

diff --git a/test/CodeGen/X86/2008-02-27-PEICrash.ll b/test/CodeGen/X86/2008-02-27-PEICrash.ll
new file mode 100644
index 0000000..d842967
--- /dev/null
+++ b/test/CodeGen/X86/2008-02-27-PEICrash.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define i64 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone  {
+entry:
+	br i1 false, label %bb56, label %bb33
+
+bb33:		; preds = %entry
+	br label %bb56
+
+bb56:		; preds = %bb33, %entry
+	%tmp36.pn = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%b.pn509 = phi float [ %b, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
+	%a.pn = phi float [ %a, %bb33 ], [ %b, %entry ]		; <float> [#uses=1]
+	%tmp41.pn508 = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%tmp51.pn = phi float [ 0.000000e+00, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
+	%tmp44.pn = fmul float %tmp36.pn, %b.pn509		; <float> [#uses=1]
+	%tmp46.pn = fadd float %tmp44.pn, %a.pn		; <float> [#uses=1]
+	%tmp53.pn = fsub float 0.000000e+00, %tmp51.pn		; <float> [#uses=1]
+	%x.0 = fdiv float %tmp46.pn, %tmp41.pn508		; <float> [#uses=1]
+	%y.0 = fdiv float %tmp53.pn, 0.000000e+00		; <float> [#uses=1]
+	br i1 false, label %bb433, label %bb98
+
+bb98:		; preds = %bb56
+	%tmp102 = fmul float 0.000000e+00, %a		; <float> [#uses=1]
+	%tmp106 = fmul float 0.000000e+00, %b		; <float> [#uses=1]
+	br label %bb433
+
+bb433:		; preds = %bb98, %bb56
+	%x.1 = phi float [ %tmp102, %bb98 ], [ %x.0, %bb56 ]		; <float> [#uses=0]
+	%y.1 = phi float [ %tmp106, %bb98 ], [ %y.0, %bb56 ]		; <float> [#uses=1]
+	%tmp460 = fadd float %y.1, 0.000000e+00		; <float> [#uses=0]
+	ret i64 0
+}

diff --git a/test/CodeGen/X86/2008-03-06-frem-fpstack.ll b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
new file mode 100644
index 0000000..70a83b5
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-06-frem-fpstack.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mcpu=i386
+; PR2122
+define float @func(float %a, float %b) nounwind  {
+entry:
+        %tmp3 = frem float %a, %b               ; <float> [#uses=1]
+        ret float %tmp3
+}

diff --git a/test/CodeGen/X86/2008-03-07-APIntBug.ll b/test/CodeGen/X86/2008-03-07-APIntBug.ll
new file mode 100644
index 0000000..84e4827
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-07-APIntBug.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | not grep 255
+
+	%struct.CONSTRAINT = type { i32, i32, i32, i32 }
+	%struct.FIRST_UNION = type { %struct.anon }
+	%struct.FOURTH_UNION = type { %struct.CONSTRAINT }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.THIRD_UNION = type { { [2 x i32], [2 x i32] } }
+	%struct.anon = type { i8, i8, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+	%struct.symbol_type = type <{ [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i16, i16, i8, i8, i8, i8 }>
+	%struct.word_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, [4 x i8] }
+
+define void @InsertSym_bb1163(%struct.rec** %s) {
+newFuncRoot:
+	br label %bb1163
+bb1233.exitStub:		; preds = %bb1163
+	ret void
+bb1163:		; preds = %newFuncRoot
+	%tmp1164 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1165 = getelementptr %struct.rec* %tmp1164, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11651166 = bitcast %struct.head_type* %tmp1165 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
+	%tmp1167 = getelementptr %struct.symbol_type* %tmp11651166, i32 0, i32 3		; <%struct.rec**> [#uses=1]
+	%tmp1168 = load %struct.rec** %tmp1167, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1169 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1170 = getelementptr %struct.rec* %tmp1169, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11701171 = bitcast %struct.head_type* %tmp1170 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
+	%tmp1172 = getelementptr %struct.symbol_type* %tmp11701171, i32 0, i32 3		; <%struct.rec**> [#uses=1]
+	%tmp1173 = load %struct.rec** %tmp1172, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1174 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11741175 = bitcast %struct.head_type* %tmp1174 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1176 = getelementptr %struct.word_type* %tmp11741175, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1177 = getelementptr %struct.SECOND_UNION* %tmp1176, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11771178 = bitcast { i16, i8, i8 }* %tmp1177 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1179 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11771178, i32 0, i32 2		; <i8*> [#uses=2]
+	%mask1180 = and i8 1, 1		; <i8> [#uses=2]
+	%tmp1181 = load i8* %tmp1179, align 1		; <i8> [#uses=1]
+	%tmp1182 = shl i8 %mask1180, 7		; <i8> [#uses=1]
+	%tmp1183 = and i8 %tmp1181, 127		; <i8> [#uses=1]
+	%tmp1184 = or i8 %tmp1183, %tmp1182		; <i8> [#uses=1]
+	store i8 %tmp1184, i8* %tmp1179, align 1
+	%mask1185 = and i8 %mask1180, 1		; <i8> [#uses=0]
+	%tmp1186 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11861187 = bitcast %struct.head_type* %tmp1186 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1188 = getelementptr %struct.word_type* %tmp11861187, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1189 = getelementptr %struct.SECOND_UNION* %tmp1188, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11891190 = bitcast { i16, i8, i8 }* %tmp1189 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1191 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11891190, i32 0, i32 2		; <i8*> [#uses=1]
+	%tmp1192 = load i8* %tmp1191, align 1		; <i8> [#uses=1]
+	%tmp1193 = lshr i8 %tmp1192, 7		; <i8> [#uses=1]
+	%mask1194 = and i8 %tmp1193, 1		; <i8> [#uses=2]
+	%mask1195 = and i8 %mask1194, 1		; <i8> [#uses=0]
+	%tmp1196 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11961197 = bitcast %struct.head_type* %tmp1196 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1198 = getelementptr %struct.word_type* %tmp11961197, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1199 = getelementptr %struct.SECOND_UNION* %tmp1198, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11991200 = bitcast { i16, i8, i8 }* %tmp1199 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1201 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11991200, i32 0, i32 1		; <i8*> [#uses=2]
+	%mask1202 = and i8 %mask1194, 1		; <i8> [#uses=2]
+	%tmp1203 = load i8* %tmp1201, align 1		; <i8> [#uses=1]
+	%tmp1204 = shl i8 %mask1202, 1		; <i8> [#uses=1]
+	%tmp1205 = and i8 %tmp1204, 2		; <i8> [#uses=1]
+	%tmp1206 = and i8 %tmp1203, -3		; <i8> [#uses=1]
+	%tmp1207 = or i8 %tmp1206, %tmp1205		; <i8> [#uses=1]
+	store i8 %tmp1207, i8* %tmp1201, align 1
+	%mask1208 = and i8 %mask1202, 1		; <i8> [#uses=0]
+	%tmp1209 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp12091210 = bitcast %struct.head_type* %tmp1209 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1211 = getelementptr %struct.word_type* %tmp12091210, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1212 = getelementptr %struct.SECOND_UNION* %tmp1211, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp12121213 = bitcast { i16, i8, i8 }* %tmp1212 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1214 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12121213, i32 0, i32 1		; <i8*> [#uses=1]
+	%tmp1215 = load i8* %tmp1214, align 1		; <i8> [#uses=1]
+	%tmp1216 = shl i8 %tmp1215, 6		; <i8> [#uses=1]
+	%tmp1217 = lshr i8 %tmp1216, 7		; <i8> [#uses=1]
+	%mask1218 = and i8 %tmp1217, 1		; <i8> [#uses=2]
+	%mask1219 = and i8 %mask1218, 1		; <i8> [#uses=0]
+	%tmp1220 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1221 = getelementptr %struct.rec* %tmp1220, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp12211222 = bitcast %struct.head_type* %tmp1221 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1223 = getelementptr %struct.word_type* %tmp12211222, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1224 = getelementptr %struct.SECOND_UNION* %tmp1223, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp12241225 = bitcast { i16, i8, i8 }* %tmp1224 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1226 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12241225, i32 0, i32 1		; <i8*> [#uses=2]
+	%mask1227 = and i8 %mask1218, 1		; <i8> [#uses=2]
+	%tmp1228 = load i8* %tmp1226, align 1		; <i8> [#uses=1]
+	%tmp1229 = and i8 %mask1227, 1		; <i8> [#uses=1]
+	%tmp1230 = and i8 %tmp1228, -2		; <i8> [#uses=1]
+	%tmp1231 = or i8 %tmp1230, %tmp1229		; <i8> [#uses=1]
+	store i8 %tmp1231, i8* %tmp1226, align 1
+	%mask1232 = and i8 %mask1227, 1		; <i8> [#uses=0]
+	br label %bb1233.exitStub
+}

diff --git a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
new file mode 100644
index 0000000..cd2d609
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim -schedule-livein-copies | not grep {Number of register spills}
+; PR2134
+
+declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind 
+
+define x86_stdcallcc i32 @parse_backslash(i8** inreg  %word, i32* inreg  %word_length, i32* inreg  %max_length) nounwind  {
+entry:
+	%tmp6 = load i8* null, align 1		; <i8> [#uses=1]
+	br label %bb13
+bb13:		; preds = %entry
+	%tmp26 = call fastcc i8* @w_addchar( i8* null, i32* %word_length, i32* %max_length, i8 signext  %tmp6 ) nounwind 		; <i8*> [#uses=1]
+	store i8* %tmp26, i8** %word, align 4
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
new file mode 100644
index 0000000..e673d31
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2
+; PR2137
+
+; ModuleID = '1.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%struct.__res_state = type { i32 }
+@__resp = thread_local global %struct.__res_state* @_res		; <%struct.__res_state**> [#uses=1]
+@_res = global %struct.__res_state zeroinitializer, section ".bss"		; <%struct.__res_state*> [#uses=1]
+
+@__libc_resp = hidden alias %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp1, align 4
+	br label %return
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+}
+
+define i32 @bar() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 4
+	br label %return
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+}

diff --git a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
new file mode 100644
index 0000000..c6ba22e
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll

@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86
+
+define i16 @t(i32 %depth) signext nounwind  {
+entry:
+	br i1 false, label %bb74, label %bb
+bb:		; preds = %entry
+	ret i16 0
+bb74:		; preds = %entry
+	switch i32 0, label %bail [
+		 i32 17, label %bb84
+		 i32 18, label %bb81
+		 i32 33, label %bb80
+		 i32 34, label %bb84
+	]
+bb80:		; preds = %bb74
+	switch i32 %depth, label %bb103 [
+		 i32 16, label %bb96
+		 i32 32, label %bb91
+		 i32 846624121, label %bb96
+		 i32 1094862674, label %bb91
+		 i32 1096368963, label %bb91
+		 i32 1111970369, label %bb91
+		 i32 1278555445, label %bb96
+		 i32 1278555701, label %bb96
+		 i32 1380401729, label %bb91
+		 i32 1668118891, label %bb91
+		 i32 1916022840, label %bb91
+		 i32 1983131704, label %bb91
+		 i32 2037741171, label %bb96
+		 i32 2037741173, label %bb96
+	]
+bb81:		; preds = %bb74
+	ret i16 0
+bb84:		; preds = %bb74, %bb74
+	switch i32 %depth, label %bb103 [
+		 i32 16, label %bb96
+		 i32 32, label %bb91
+		 i32 846624121, label %bb96
+		 i32 1094862674, label %bb91
+		 i32 1096368963, label %bb91
+		 i32 1111970369, label %bb91
+		 i32 1278555445, label %bb96
+		 i32 1278555701, label %bb96
+		 i32 1380401729, label %bb91
+		 i32 1668118891, label %bb91
+		 i32 1916022840, label %bb91
+		 i32 1983131704, label %bb91
+		 i32 2037741171, label %bb96
+		 i32 2037741173, label %bb96
+	]
+bb91:		; preds = %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80
+	%wMB.0.reg2mem.0 = phi i16 [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ]		; <i16> [#uses=2]
+	%tmp941478 = shl i16 %wMB.0.reg2mem.0, 2		; <i16> [#uses=1]
+	br label %bb103
+bb96:		; preds = %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80
+	ret i16 0
+bb103:		; preds = %bb91, %bb84, %bb80
+	%wMB.0.reg2mem.2 = phi i16 [ %wMB.0.reg2mem.0, %bb91 ], [ 16, %bb80 ], [ 0, %bb84 ]		; <i16> [#uses=1]
+	%bBump.0 = phi i16 [ %tmp941478, %bb91 ], [ 16, %bb80 ], [ 0, %bb84 ]		; <i16> [#uses=0]
+	br i1 false, label %bb164, label %UnifiedReturnBlock
+bb164:		; preds = %bb103
+	%tmp167168 = sext i16 %wMB.0.reg2mem.2 to i32		; <i32> [#uses=0]
+	ret i16 0
+bail:		; preds = %bb74
+	ret i16 0
+UnifiedReturnBlock:		; preds = %bb103
+	ret i16 0
+}

diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
new file mode 100644
index 0000000..8946415
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR2138
+
+	%struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] }
+	%struct.anon = type { i8* }
+	%struct.locale_data = type { i8*, i8*, i32, i32, { void (%struct.locale_data*)*, %struct.anon }, i32, i32, i32, [0 x %struct.locale_data_value] }
+	%struct.locale_data_value = type { i32* }
+
+@wcstoll_l = alias i64 (i32*, i32**, i32, %struct.__locale_struct*)* @__wcstoll_l		; <i64 (i32*, i32**, i32, %struct.__locale_struct*)*> [#uses=0]
+
+define i64 @____wcstoll_l_internal(i32* %nptr, i32** %endptr, i32 %base, i32 %group, %struct.__locale_struct* %loc) nounwind  {
+entry:
+	%tmp27 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp83 = getelementptr i32* %nptr, i32 1		; <i32*> [#uses=1]
+	%tmp233 = add i32 0, -48		; <i32> [#uses=1]
+	br label %bb271.us
+bb271.us:		; preds = %entry
+	br label %bb374.outer
+bb311.split:		; preds = %bb305.us
+	%tmp313 = add i32 %tmp378.us, -48		; <i32> [#uses=1]
+	br i1 false, label %bb374.outer, label %bb383
+bb327.split:		; preds = %bb314.us
+	ret i64 0
+bb374.outer:		; preds = %bb311.split, %bb271.us
+	%tmp370371552.pn.in = phi i32 [ %tmp233, %bb271.us ], [ %tmp313, %bb311.split ]		; <i32> [#uses=1]
+	%tmp278279.pn = phi i64 [ 0, %bb271.us ], [ %tmp373.reg2mem.0.ph, %bb311.split ]		; <i64> [#uses=1]
+	%s.5.ph = phi i32* [ null, %bb271.us ], [ %tmp376.us, %bb311.split ]		; <i32*> [#uses=1]
+	%tmp366367550.pn = sext i32 %base to i64		; <i64> [#uses=1]
+	%tmp370371552.pn = zext i32 %tmp370371552.pn.in to i64		; <i64> [#uses=1]
+	%tmp369551.pn = mul i64 %tmp278279.pn, %tmp366367550.pn		; <i64> [#uses=1]
+	%tmp373.reg2mem.0.ph = add i64 %tmp370371552.pn, %tmp369551.pn		; <i64> [#uses=1]
+	br label %bb374.us
+bb374.us:		; preds = %bb314.us, %bb374.outer
+	%tmp376.us = getelementptr i32* %s.5.ph, i32 0		; <i32*> [#uses=3]
+	%tmp378.us = load i32* %tmp376.us, align 4		; <i32> [#uses=2]
+	%tmp302.us = icmp eq i32* %tmp376.us, %tmp83		; <i1> [#uses=1]
+	%bothcond484.us = or i1 false, %tmp302.us		; <i1> [#uses=1]
+	br i1 %bothcond484.us, label %bb383, label %bb305.us
+bb305.us:		; preds = %bb374.us
+	br i1 false, label %bb311.split, label %bb314.us
+bb314.us:		; preds = %bb305.us
+	%tmp320.us = icmp eq i32 %tmp378.us, %tmp27		; <i1> [#uses=1]
+	br i1 %tmp320.us, label %bb374.us, label %bb327.split
+bb383:		; preds = %bb374.us, %bb311.split
+	ret i64 0
+}
+
+declare i64 @__wcstoll_l(i32*, i32**, i32, %struct.__locale_struct*) nounwind 

diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
new file mode 100644
index 0000000..ccc4d75
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
+
+	%struct..0objc_object = type opaque
+	%struct.OhBoy = type {  }
+	%struct.BooHoo = type { i32 }
+	%struct.objc_selector = type opaque
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.OhBoy*, %struct.objc_selector*, i32, %struct.BooHoo*)* @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]"(%struct.OhBoy* %self, %struct.objc_selector* %_cmd, i32 %delta, %struct.BooHoo* %viewingState) nounwind  {
+entry:
+	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp24 = tail call float bitcast (void (%struct..0objc_object*, ...)* @objc_msgSend_fpret to float (%struct..0objc_object*, %struct.objc_selector*)*)( %struct..0objc_object* null, %struct.objc_selector* null ) nounwind 		; <float> [#uses=2]
+	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
+	br i1 %tmp30, label %bb33, label %bb87.preheader
+bb33:		; preds = %entry
+	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
+	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
+	br i1 %tmp35, label %bb38, label %bb87.preheader
+bb38:		; preds = %bb33
+	%tmp53 = add i32 %tmp19, %delta		; <i32> [#uses=2]
+	br i1 false, label %bb50, label %bb43
+bb43:		; preds = %bb38
+	store i32 %tmp53, i32* null, align 4
+	ret void
+bb50:		; preds = %bb38
+	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
+	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
+	br i1 %tmp57, label %bb64, label %bb87.preheader
+bb64:		; preds = %bb50
+	ret void
+bb87.preheader:		; preds = %bb50, %bb33, %entry
+	%usableDelta.0 = phi i32 [ %delta, %entry ], [ %delta, %bb33 ], [ %tmp53, %bb50 ]		; <i32> [#uses=1]
+	%tmp100 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* null, %struct.objc_selector* null, %struct..0objc_object* null ) nounwind 		; <%struct..0objc_object*> [#uses=2]
+	%tmp106 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null ) nounwind 		; <%struct..0objc_object*> [#uses=0]
+	%umax = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb108
+bb108:		; preds = %bb108, %bb87.preheader
+	%attachmentIndex.0.reg2mem.0 = phi i32 [ 0, %bb87.preheader ], [ %indvar.next, %bb108 ]		; <i32> [#uses=2]
+	%tmp114 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null, i32 %attachmentIndex.0.reg2mem.0 ) nounwind 		; <%struct..0objc_object*> [#uses=1]
+	%tmp121 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp114, %struct.objc_selector* null, i32 %usableDelta.0 ) nounwind 		; <%struct..0objc_object*> [#uses=0]
+	%indvar.next = add i32 %attachmentIndex.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb130, label %bb108
+bb130:		; preds = %bb108
+	ret void
+}
+
+declare %struct..0objc_object* @objc_msgSend(%struct..0objc_object*, %struct.objc_selector*, ...)
+
+declare void @objc_msgSend_fpret(%struct..0objc_object*, ...)

diff --git a/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
new file mode 100644
index 0000000..eaa883c
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+
+define i32 @t() nounwind  {
+entry:
+	%tmp54 = add i32 0, 1		; <i32> [#uses=1]
+	br i1 false, label %bb71, label %bb77
+bb71:		; preds = %entry
+	%tmp74 = shl i32 %tmp54, 1		; <i32> [#uses=1]
+	%tmp76 = ashr i32 %tmp74, 3		; <i32> [#uses=1]
+	br label %bb77
+bb77:		; preds = %bb71, %entry
+	%payLoadSize.0 = phi i32 [ %tmp76, %bb71 ], [ 0, %entry ]		; <i32> [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
new file mode 100644
index 0000000..4dc3a10
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+
+	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
+	%struct.AGenericManager = type <{ i8 }>
+	%struct.ComponentInstanceRecord = type opaque
+	%struct.ComponentParameters = type { [1 x i64] }
+
+define i32 @_ZN12AGenericCall10MapIDPtrAtEsRP23ComponentInstanceRecord(%struct.AGenericCall* %this, i16 signext  %param, %struct.ComponentInstanceRecord** %instance) {
+entry:
+	%tmp4 = icmp slt i16 %param, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
+	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
+	%tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
+	store i32* %tmp2829, i32** %tmp31, align 8
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
+	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
+	%tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
+	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
+	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
+	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
+
+cond_true63:		; preds = %cond_next
+	%tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
+	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
+	ret i32 %tmp69
+
+UnifiedReturnBlock:		; preds = %cond_next
+	ret i32 undef
+}
+
+declare i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord(%struct.AGenericManager*, %struct.ComponentInstanceRecord**)

diff --git a/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
new file mode 100644
index 0000000..2d868e0
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @t() {
+entry:
+	%tmp455 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 0, i32 3, i32 2 >		; <<4 x float>> [#uses=1]
+	%tmp457 = fmul <4 x float> zeroinitializer, %tmp455		; <<4 x float>> [#uses=2]
+	%tmp461 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp465 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
+	%tmp466 = fsub <4 x float> %tmp461, %tmp465		; <<4 x float>> [#uses=1]
+	%tmp536 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp466, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%tmp542 = shufflevector <4 x float> %tmp536, <4 x float> zeroinitializer, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	%tmp580 = bitcast <4 x float> %tmp542 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp582 = and <4 x i32> %tmp580, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp591 = or <4 x i32> %tmp582, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp592 = bitcast <4 x i32> %tmp591 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp609 = fdiv <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, %tmp592		; <<4 x float>> [#uses=1]
+	%tmp652 = shufflevector <4 x float> %tmp609, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>> [#uses=1]
+	%tmp662 = fmul <4 x float> zeroinitializer, %tmp652		; <<4 x float>> [#uses=1]
+	%tmp678 = shufflevector <4 x float> %tmp662, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
+	%tmp753 = fmul <4 x float> zeroinitializer, %tmp678		; <<4 x float>> [#uses=1]
+	%tmp754 = fsub <4 x float> zeroinitializer, %tmp753		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp754, <4 x float>* null, align 16
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
new file mode 100644
index 0000000..305968a
--- /dev/null
+++ b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
+; Don't fold re-materialized load into a two address instruction
+
+	%"struct.Smarts::Runnable" = type { i32 (...)**, i32 }
+	%struct.__sbuf = type { i8*, i32 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.Smarts::Runnable"**, i32, %"struct.Smarts::Runnable"**, i8** }
+@_ZTVSt9basic_iosIcSt11char_traitsIcEE = external constant [4 x i32 (...)*]		; <[4 x i32 (...)*]*> [#uses=1]
+@_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE = external constant [4 x i8*]		; <[4 x i8*]*> [#uses=1]
+@_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE = external constant [10 x i32 (...)*]		; <[10 x i32 (...)*]*> [#uses=2]
+@_ZTVSt15basic_streambufIcSt11char_traitsIcEE = external constant [16 x i32 (...)*]		; <[16 x i32 (...)*]*> [#uses=1]
+@_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE = external constant [16 x i32 (...)*]		; <[16 x i32 (...)*]*> [#uses=1]
+
+define void @_GLOBAL__I__ZN5Pooma5pinfoE() nounwind  {
+entry:
+	store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 8), i32 (...)*** null, align 4
+	%tmp96.i.i142.i = call i8* @_Znwm( i32 180 ) nounwind 		; <i8*> [#uses=2]
+	call void @_ZNSt8ios_baseC2Ev( %"struct.std::ios_base"* null ) nounwind 
+	store i32 (...)** getelementptr ([4 x i32 (...)*]* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	store i32 (...)** null, i32 (...)*** null, align 4
+	%ctg2242.i.i163.i = getelementptr i8* %tmp96.i.i142.i, i32 0		; <i8*> [#uses=1]
+	%tmp150.i.i164.i = load i8** getelementptr ([4 x i8*]* @_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i64 2), align 4		; <i8*> [#uses=1]
+	%tmp150151.i.i165.i = bitcast i8* %tmp150.i.i164.i to i32 (...)**		; <i32 (...)**> [#uses=1]
+	%tmp153.i.i166.i = bitcast i8* %ctg2242.i.i163.i to i32 (...)***		; <i32 (...)***> [#uses=1]
+	store i32 (...)** %tmp150151.i.i165.i, i32 (...)*** %tmp153.i.i166.i, align 4
+	%tmp159.i.i167.i = bitcast i8* %tmp96.i.i142.i to i32 (...)***		; <i32 (...)***> [#uses=1]
+	store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 3), i32 (...)*** %tmp159.i.i167.i, align 4
+	store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	call void @_ZNSt6localeC1Ev( %"struct.std::locale"* null ) nounwind 
+	store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	unreachable
+}
+
+declare i8* @_Znwm(i32)
+
+declare void @_ZNSt8ios_baseC2Ev(%"struct.std::ios_base"*)
+
+declare void @_ZNSt6localeC1Ev(%"struct.std::locale"*) nounwind 

diff --git a/test/CodeGen/X86/2008-04-02-unnamedEH.ll b/test/CodeGen/X86/2008-04-02-unnamedEH.ll
new file mode 100644
index 0000000..27bbbaa
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-02-unnamedEH.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @_Z3bazv() {
+	call void @0( )		; <i32>:1 [#uses=0]
+	ret void
+}
+
+define internal void @""() {
+	call i32 @_Z3barv( )		; <i32>:4 [#uses=1]
+	ret void
+}
+; CHECK: unnamed_1.eh
+
+declare i32 @_Z3barv()

diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
new file mode 100644
index 0000000..dc8c097
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+
+define i32 @t2() nounwind  {
+entry:
+	tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind 
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp1 = tail call <2 x i32> asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind 		; <<2 x i32>> [#uses=1]
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef ) nounwind 		; <i32> [#uses=1]
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef, i32 undef, i32 %tmp3 ) nounwind 
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> %tmp1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}

diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
new file mode 100644
index 0000000..41fbdd1
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86 | not grep jmp
+
+	%struct..0anon = type { i32 }
+	%struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* }
+	%struct.lang_decl = type opaque
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+	%struct.tree_decl = type { [12 x i8], i8*, i32, %struct.tree_node*, i32, i8, i8, i8, i8, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct..0anon, { %struct.rtx_def* }, %struct.tree_node*, %struct.lang_decl* }
+	%struct.tree_node = type { %struct.tree_decl }
+
+define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind  {
+entry:
+	%tmp3.i40 = icmp eq %struct.binding_level* null, null		; <i1> [#uses=2]
+	br i1 false, label %bb143, label %bb140
+bb140:		; preds = %entry
+	br i1 %tmp3.i40, label %bb160, label %bb17.i
+bb17.i:		; preds = %bb140
+	ret %struct.tree_node* null
+bb143:		; preds = %entry
+	%tmp8.i43 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	br i1 %tmp3.i40, label %bb160, label %bb9.i48
+bb9.i48:		; preds = %bb143
+	ret %struct.tree_node* null
+bb160:		; preds = %bb143, %bb140
+	%t.0.reg2mem.0 = phi %struct.tree_node* [ null, %bb140 ], [ %tmp8.i43, %bb143 ]		; <%struct.tree_node*> [#uses=1]
+	%tmp162 = icmp eq %struct.tree_node* %t.0.reg2mem.0, null		; <i1> [#uses=2]
+	br i1 %tmp162, label %bb174, label %bb165
+bb165:		; preds = %bb160
+	br label %bb174
+bb174:		; preds = %bb165, %bb160
+	%line.0 = phi i32 [ 0, %bb165 ], [ undef, %bb160 ]		; <i32> [#uses=1]
+	%file.0 = phi i8* [ null, %bb165 ], [ undef, %bb160 ]		; <i8*> [#uses=1]
+	br i1 %tmp162, label %bb344, label %bb73.i
+bb73.i:		; preds = %bb174
+	br i1 false, label %bb226.i, label %bb220.i
+bb220.i:		; preds = %bb73.i
+	ret %struct.tree_node* null
+bb226.i:		; preds = %bb73.i
+	br i1 false, label %bb260, label %bb273.i
+bb273.i:		; preds = %bb226.i
+	ret %struct.tree_node* null
+bb260:		; preds = %bb226.i
+	tail call void (i8*, i32, ...)* @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind 
+	ret %struct.tree_node* null
+bb344:		; preds = %bb174
+	ret %struct.tree_node* null
+}
+
+declare void @pedwarn_with_file_and_line(i8*, i32, ...) nounwind 

diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
new file mode 100644
index 0000000..2aea9c5
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local
+; PR5534
+
+	%struct.CGPoint = type { double, double }
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSAssertionHandler = type { %struct.NSObject, i8* }
+	%struct.NSDockTile = type { %struct.NSObject, %struct.NSObject*, i8*, %struct.NSView*, %struct.NSView*, %struct.NSView*, %struct.NSArray*, %struct._SPFlags, %struct.CGPoint, [5 x %struct.NSObject*] }
+	%struct.NSDocument = type { %struct.NSObject, %struct.NSWindow*, %struct.NSObject*, %struct.NSURL*, %struct.NSArray*, %struct.NSPrintInfo*, i64, %struct.NSView*, %struct.NSObject*, %struct.NSObject*, %struct.NSUndoManager*, %struct._BCFlags2, %struct.NSArray* }
+	%struct.AA = type { %struct.NSObject, %struct.NSDocument*, %struct.NSURL*, %struct.NSArray*, %struct.NSArray* }
+	%struct.NSError = type { %struct.NSObject, i8*, i64, %struct.NSArray*, %struct.NSArray* }
+	%struct.NSImage = type { %struct.NSObject, %struct.NSArray*, %struct.CGPoint, %struct._BCFlags2, %struct.NSObject*, %struct._NSImageAuxiliary* }
+	%struct.NSMutableArray = type { %struct.NSArray }
+	%struct.NSObject = type { %struct.NSObject* }
+	%struct.NSPrintInfo = type { %struct.NSObject, %struct.NSMutableArray*, %struct.NSObject* }
+	%struct.NSRect = type { %struct.CGPoint, %struct.CGPoint }
+	%struct.NSRegion = type opaque
+	%struct.NSResponder = type { %struct.NSObject, %struct.NSObject* }
+	%struct.NSToolbar = type { %struct.NSObject, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSObject*, %struct.NSArray*, i8*, %struct.NSObject*, %struct.NSWindow*, %struct.NSObject*, %struct.NSObject*, i64, %struct._BCFlags2, i64, %struct.NSObject* }
+	%struct.NSURL = type { %struct.NSObject, %struct.NSArray*, %struct.NSURL*, i8*, i8* }
+	%struct.NSUndoManager = type { %struct.NSObject, %struct.NSObject*, %struct.NSObject*, %struct.NSArray*, i64, %struct._SPFlags, %struct.NSObject*, i8*, i8*, i8* }
+	%struct.NSView = type { %struct.NSResponder, %struct.NSRect, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSWindow*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct._NSViewAuxiliary*, %struct._BCFlags, %struct._SPFlags }
+	%struct.NSWindow = type { %struct.NSResponder, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSResponder*, %struct.NSView*, %struct.NSView*, %struct.NSObject*, %struct.NSObject*, i32, i64, i32, %struct.NSArray*, %struct.NSObject*, i8, i8, i8, i8, i8*, i8*, %struct.NSImage*, i32, %struct.NSMutableArray*, %struct.NSURL*, %struct.CGPoint*, %struct.NSArray*, %struct.NSArray*, %struct.__wFlags, %struct.NSObject*, %struct.NSView*, %struct.NSWindowAuxiliary* }
+	%struct.NSWindowAuxiliary = type { %struct.NSObject, %struct.NSArray*, %struct.NSDockTile*, %struct._NSWindowAnimator*, %struct.NSRect, i32, %struct.NSAssertionHandler*, %struct.NSUndoManager*, %struct.NSWindowController*, %struct.NSAssertionHandler*, %struct.NSObject*, i32, %struct.__CFRunLoopObserver*, %struct.__CFRunLoopObserver*, %struct.NSArray*, %struct.NSArray*, %struct.NSView*, %struct.NSRegion*, %struct.NSWindow*, %struct.NSWindow*, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSWindow*, %struct.CGPoint, %struct.NSObject*, i8*, i8*, i32, %struct.NSObject*, %struct.NSArray*, double, %struct.CGPoint, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSMutableArray*, %struct.NSWindow*, %struct.NSView*, %struct.NSArray*, %struct.__auxWFlags, i32, i8*, double, %struct.NSObject*, %struct.NSObject*, %struct.__CFArray*, %struct.NSRegion*, %struct.NSArray*, %struct.NSRect, %struct.NSToolbar*, %struct.NSRect, %struct.NSMutableArray* }
+	%struct.NSWindowController = type { %struct.NSResponder, %struct.NSWindow*, %struct.NSArray*, %struct.NSDocument*, %struct.NSArray*, %struct.NSObject*, %struct._SPFlags, %struct.NSArray*, %struct.NSObject* }
+	%struct._BCFlags = type <{ i8, i8, i8, i8 }>
+	%struct._BCFlags2 = type <{ i8, [3 x i8] }>
+	%struct._NSImageAuxiliary = type opaque
+	%struct._NSViewAuxiliary = type opaque
+	%struct._NSWindowAnimator = type opaque
+	%struct._SPFlags = type <{ i32 }>
+	%struct.__CFArray = type opaque
+	%struct.__CFRunLoopObserver = type opaque
+	%struct.__auxWFlags = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16 }
+	%struct.__wFlags = type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_228" = internal global %struct._message_ref_t zeroinitializer		; <%struct._message_ref_t*> [#uses=1]
+@llvm.used1 = appending global [1 x i8*] [ i8* bitcast (void (%struct.AA*, %struct._message_ref_t*, %struct.NSError*, i64, %struct.NSObject*, %struct.objc_selector*, i8*)* @"-[AA BB:optionIndex:delegate:CC:contextInfo:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @"-[AA BB:optionIndex:delegate:CC:contextInfo:]"(%struct.AA* %self, %struct._message_ref_t* %_cmd, %struct.NSError* %inError, i64 %inOptionIndex, %struct.NSObject* %inDelegate, %struct.objc_selector* %inDidRecoverSelector, i8* %inContextInfo) {
+entry:
+	%tmp105 = load %struct.NSArray** null, align 8		; <%struct.NSArray*> [#uses=1]
+	%tmp107 = load %struct.NSObject** null, align 8		; <%struct.NSObject*> [#uses=1]
+	call void null( %struct.NSObject* %tmp107, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_228", %struct.NSArray* %tmp105, i8 signext  0 )
+	%tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null )		; <%struct.NSObject*> [#uses=0]
+	ret void
+}
+
+declare %struct.NSObject* @objc_msgSend(%struct.NSObject*, %struct.objc_selector*, ...)

diff --git a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
new file mode 100644
index 0000000..3ccc0fe
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86
+
+define void @Hubba(i8* %saveunder, i32 %firstBlob, i32 %select) nounwind  {
+entry:
+	br i1 false, label %bb53.us, label %bb53
+bb53.us:		; preds = %bb94.us, %bb53.us, %entry
+	switch i8 1, label %bb71.us [
+		 i8 0, label %bb53.us
+		 i8 1, label %bb94.us
+	]
+bb94.us:		; preds = %bb71.us, %bb53.us
+	%result.0.us = phi i32 [ %tmp93.us, %bb71.us ], [ 0, %bb53.us ]		; <i32> [#uses=2]
+	%tmp101.us = lshr i32 %result.0.us, 3		; <i32> [#uses=1]
+	%result.0163.us = trunc i32 %result.0.us to i16		; <i16> [#uses=2]
+	shl i16 %result.0163.us, 7		; <i16>:0 [#uses=1]
+	%tmp106.us = and i16 %0, -1024		; <i16> [#uses=1]
+	shl i16 %result.0163.us, 2		; <i16>:1 [#uses=1]
+	%tmp109.us = and i16 %1, -32		; <i16> [#uses=1]
+	%tmp111112.us = trunc i32 %tmp101.us to i16		; <i16> [#uses=1]
+	%tmp110.us = or i16 %tmp109.us, %tmp111112.us		; <i16> [#uses=1]
+	%tmp113.us = or i16 %tmp110.us, %tmp106.us		; <i16> [#uses=1]
+	store i16 %tmp113.us, i16* null, align 2
+	br label %bb53.us
+bb71.us:		; preds = %bb53.us
+	%tmp80.us = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp8081.us = zext i8 %tmp80.us to i32		; <i32> [#uses=1]
+	%tmp87.us = mul i32 %tmp8081.us, 0		; <i32> [#uses=1]
+	%tmp92.us = add i32 0, %tmp87.us		; <i32> [#uses=1]
+	%tmp93.us = udiv i32 %tmp92.us, 255		; <i32> [#uses=1]
+	br label %bb94.us
+bb53:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
new file mode 100644
index 0000000..6e8891b
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
+
+	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
+	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
+	%struct.GENV_t = type { i32, i8*, i16, i8*, i8*, i32, i32, i32, i32, %struct.DBC_t*, i16 }
+	%struct.pthread_mutex_t = type { i32, [40 x i8] }
+@iodbcdm_global_lock = external global %struct.pthread_mutex_t		; <%struct.pthread_mutex_t*> [#uses=1]
+
+define i16 @SQLDriversW(i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr) signext nounwind  {
+entry:
+	%tmp12 = bitcast i8* %henv to %struct.GENV_t*		; <%struct.GENV_t*> [#uses=1]
+	br i1 true, label %bb28, label %bb
+bb:		; preds = %entry
+	ret i16 0
+bb28:		; preds = %entry
+	br i1 false, label %bb37, label %done
+bb37:		; preds = %bb28
+	%tmp46 = getelementptr %struct.GENV_t* %tmp12, i32 0, i32 10		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp46, align 4
+	br i1 false, label %bb74, label %bb92
+bb74:		; preds = %bb37
+	br label %bb92
+bb92:		; preds = %bb74, %bb37
+	%tmp95180 = shl i16 %cbDrvAttrMax, 2		; <i16> [#uses=1]
+	%tmp100178 = shl i16 %cbDrvDescMax, 2		; <i16> [#uses=1]
+	%tmp113 = tail call i16 @SQLDrivers_Internal( i8* %henv, i16 zeroext  %fDir, i8* null, i16 signext  %tmp100178, i16* %pcbDrvDesc, i8* null, i16 signext  %tmp95180, i16* %pcbDrvAttr, i8 zeroext  87 ) signext nounwind 		; <i16> [#uses=1]
+	br i1 false, label %done, label %bb137
+bb137:		; preds = %bb92
+	ret i16 0
+done:		; preds = %bb92, %bb28
+	%retcode.0 = phi i16 [ -2, %bb28 ], [ %tmp113, %bb92 ]		; <i16> [#uses=2]
+	br i1 false, label %bb167, label %bb150
+bb150:		; preds = %done
+	%tmp157158 = sext i16 %retcode.0 to i32		; <i32> [#uses=1]
+	tail call void @trace_SQLDriversW( i32 1, i32 %tmp157158, i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr ) nounwind 
+	ret i16 0
+bb167:		; preds = %done
+	%tmp168 = tail call i32 @pthread_mutex_unlock( %struct.pthread_mutex_t* @iodbcdm_global_lock ) nounwind 		; <i32> [#uses=0]
+	ret i16 %retcode.0
+}
+
+declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare i16 @SQLDrivers_Internal(i8*, i16 zeroext , i8*, i16 signext , i16*, i8*, i16 signext , i16*, i8 zeroext ) signext nounwind 
+
+declare void @trace_SQLDriversW(i32, i32, i8*, i16 zeroext , i32*, i16 signext , i16*, i32*, i16 signext , i16*)

diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
new file mode 100644
index 0000000..ac48285
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll

@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; Make sure xorl operands are 32-bit registers.
+
+	%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
+	%struct.wxDateTime = type { %struct.wxLongLong }
+	%"struct.wxDateTime::TimeZone" = type { i32 }
+	%struct.wxLongLong = type { i64 }
+	%struct.wxString = type { %struct.wxStringBase }
+	%struct.wxStringBase = type { i32* }
+@.str = external constant [27 x i32]		; <[27 x i32]*> [#uses=1]
+@.str4 = external constant [14 x i32]		; <[14 x i32]*> [#uses=1]
+@_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__ = external constant [6 x i8]		; <[6 x i8]*> [#uses=1]
+@.str33 = external constant [29 x i32]		; <[29 x i32]*> [#uses=1]
+@.str89 = external constant [5 x i32]		; <[5 x i32]*> [#uses=1]
+
+define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalias sret  %agg.result, %struct.wxDateTime* %this, i32* %format, %"struct.wxDateTime::TimeZone"* %tz, i1 %foo) {
+entry:
+	br i1 %foo, label %bb116.i, label %bb115.critedge.i
+bb115.critedge.i:		; preds = %entry
+	ret void
+bb116.i:		; preds = %entry
+	br i1 %foo, label %bb52.i.i, label %bb3118
+bb3118:		; preds = %bb116.i
+	ret void
+bb52.i.i:		; preds = %bb116.i
+	br i1 %foo, label %bb142.i, label %bb115.critedge.i.i
+bb115.critedge.i.i:		; preds = %bb52.i.i
+	ret void
+bb142.i:		; preds = %bb52.i.i
+	br i1 %foo, label %bb161.i, label %bb182.i
+bb161.i:		; preds = %bb142.i
+	br label %bb3261
+bb182.i:		; preds = %bb142.i
+	ret void
+bb3261:		; preds = %bb7834, %bb161.i
+	%tmp3263 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp3264 = icmp eq i32 %tmp3263, 37		; <i1> [#uses=1]
+	br i1 %tmp3264, label %bb3306, label %bb3267
+bb3267:		; preds = %bb3261
+	ret void
+bb3306:		; preds = %bb3261
+	%tmp3310 = invoke %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw( %struct.wxStringBase* null, i32* getelementptr ([5 x i32]* @.str89, i32 0, i32 0) )
+			to label %bb3314 unwind label %lpad		; <%struct.wxStringBase*> [#uses=0]
+bb3314:		; preds = %bb3306
+	%tmp3316 = load i32* null, align 4		; <i32> [#uses=1]
+	switch i32 %tmp3316, label %bb7595 [
+		 i32 0, label %bb7819
+		 i32 37, label %bb7806
+		 i32 66, label %bb3477
+		 i32 72, label %bb5334
+		 i32 73, label %bb5484
+		 i32 77, label %bb6118
+		 i32 83, label %bb6406
+		 i32 85, label %bb6556
+		 i32 87, label %bb6708
+		 i32 89, label %bb7308
+		 i32 98, label %bb3477
+		 i32 99, label %bb3626
+		 i32 100, label %bb5184
+		 i32 106, label %bb5657
+		 i32 108, label %bb5809
+		 i32 109, label %bb5968
+		 i32 119, label %bb6860
+		 i32 120, label %bb3626
+		 i32 121, label %bb7158
+	]
+bb3477:		; preds = %bb3314, %bb3314
+	ret void
+bb3626:		; preds = %bb3314, %bb3314
+	ret void
+bb5184:		; preds = %bb3314
+	ret void
+bb5334:		; preds = %bb3314
+	ret void
+bb5484:		; preds = %bb3314
+	ret void
+bb5657:		; preds = %bb3314
+	%tmp5661 = invoke i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE( %struct.wxDateTime* %this, %"struct.wxDateTime::TimeZone"* %tz ) zeroext 
+			to label %invcont5660 unwind label %lpad		; <i16> [#uses=0]
+invcont5660:		; preds = %bb5657
+	ret void
+bb5809:		; preds = %bb3314
+	%tmp61.i.i8486 = icmp sgt i64 0, -1		; <i1> [#uses=1]
+	%tmp95.i.i8490 = icmp slt i64 0, 2147483647000		; <i1> [#uses=1]
+	%bothcond9308 = and i1 %tmp61.i.i8486, %tmp95.i.i8490		; <i1> [#uses=1]
+	br i1 %bothcond9308, label %bb91.i8504, label %bb115.critedge.i.i8492
+bb115.critedge.i.i8492:		; preds = %bb5809
+	ret void
+bb91.i8504:		; preds = %bb5809
+	br i1 %foo, label %bb155.i8541, label %bb182.i8560
+bb155.i8541:		; preds = %bb91.i8504
+	%tmp156.i85398700 = invoke %struct.tm* @gmtime_r( i32* null, %struct.tm* null )
+			to label %bb182.i8560 unwind label %lpad		; <%struct.tm*> [#uses=1]
+bb182.i8560:		; preds = %bb155.i8541, %bb91.i8504
+	%tm48.0.i8558 = phi %struct.tm* [ null, %bb91.i8504 ], [ %tmp156.i85398700, %bb155.i8541 ]		; <%struct.tm*> [#uses=0]
+	br i1 %foo, label %bb278.i8617, label %bb187.i8591
+bb187.i8591:		; preds = %bb182.i8560
+	%tmp245.i8588 = srem i64 0, 86400000		; <i64> [#uses=1]
+	br i1 %foo, label %bb264.i8592, label %bb265.i8606
+bb264.i8592:		; preds = %bb187.i8591
+	ret void
+bb265.i8606:		; preds = %bb187.i8591
+	%tmp268269.i8593 = trunc i64 %tmp245.i8588 to i32		; <i32> [#uses=1]
+	%tmp273.i8594 = srem i32 %tmp268269.i8593, 1000		; <i32> [#uses=1]
+	%tmp273274.i8595 = trunc i32 %tmp273.i8594 to i16		; <i16> [#uses=1]
+	br label %invcont5814
+bb278.i8617:		; preds = %bb182.i8560
+	%timeOnly50.0.i8622 = add i32 0, 0		; <i32> [#uses=1]
+	br i1 %foo, label %bb440.i8663, label %bb448.i8694
+bb440.i8663:		; preds = %bb278.i8617
+	invoke void @_Z10wxOnAssertPKwiPKcS0_S0_( i32* getelementptr ([27 x i32]* @.str, i32 0, i32 0), i32 1717, i8* getelementptr ([6 x i8]* @_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__, i32 0, i32 0), i32* getelementptr ([29 x i32]* @.str33, i32 0, i32 0), i32* getelementptr ([14 x i32]* @.str4, i32 0, i32 0) )
+			to label %bb448.i8694 unwind label %lpad
+bb448.i8694:		; preds = %bb440.i8663, %bb278.i8617
+	%tmp477.i8669 = srem i32 %timeOnly50.0.i8622, 1000		; <i32> [#uses=1]
+	%tmp477478.i8670 = trunc i32 %tmp477.i8669 to i16		; <i16> [#uses=1]
+	br label %invcont5814
+invcont5814:		; preds = %bb448.i8694, %bb265.i8606
+	%tmp812.0.0 = phi i16 [ %tmp477478.i8670, %bb448.i8694 ], [ %tmp273274.i8595, %bb265.i8606 ]		; <i16> [#uses=1]
+	%tmp58165817 = zext i16 %tmp812.0.0 to i32		; <i32> [#uses=1]
+	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 %tmp58165817 )
+			to label %invcont5831 unwind label %lpad
+invcont5831:		; preds = %invcont5814
+	%tmp5862 = invoke i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 ) zeroext 
+			to label %bb7834 unwind label %lpad8185		; <i8> [#uses=0]
+bb5968:		; preds = %bb3314
+	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 0 )
+			to label %invcont5981 unwind label %lpad
+invcont5981:		; preds = %bb5968
+	ret void
+bb6118:		; preds = %bb3314
+	ret void
+bb6406:		; preds = %bb3314
+	ret void
+bb6556:		; preds = %bb3314
+	ret void
+bb6708:		; preds = %bb3314
+	ret void
+bb6860:		; preds = %bb3314
+	ret void
+bb7158:		; preds = %bb3314
+	ret void
+bb7308:		; preds = %bb3314
+	ret void
+bb7595:		; preds = %bb3314
+	ret void
+bb7806:		; preds = %bb3314
+	%tmp7814 = invoke %struct.wxStringBase* @_ZN12wxStringBase6appendEmw( %struct.wxStringBase* null, i32 1, i32 0 )
+			to label %bb7834 unwind label %lpad		; <%struct.wxStringBase*> [#uses=0]
+bb7819:		; preds = %bb3314
+	ret void
+bb7834:		; preds = %bb7806, %invcont5831
+	br label %bb3261
+lpad:		; preds = %bb7806, %bb5968, %invcont5814, %bb440.i8663, %bb155.i8541, %bb5657, %bb3306
+	ret void
+lpad8185:		; preds = %invcont5831
+	ret void
+}
+
+declare void @_Z10wxOnAssertPKwiPKcS0_S0_(i32*, i32, i8*, i32*, i32*)
+
+declare i8 @_ZN12wxStringBase10ConcatSelfEmPKwm(%struct.wxStringBase*, i32, i32*, i32) zeroext 
+
+declare %struct.tm* @gmtime_r(i32*, %struct.tm*)
+
+declare i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE(%struct.wxDateTime*, %"struct.wxDateTime::TimeZone"*) zeroext 
+
+declare %struct.wxStringBase* @_ZN12wxStringBase6appendEmw(%struct.wxStringBase*, i32, i32)
+
+declare %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw(%struct.wxStringBase*, i32*)
+
+declare void @_ZN8wxString6FormatEPKwz(%struct.wxString* noalias sret , i32*, ...)

diff --git a/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
new file mode 100644
index 0000000..6389267
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-24-MemCpyBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep 120
+; Don't accidentally add the offset twice for trailing bytes.
+
+	%struct.S63 = type { [63 x i8] }
+@g1s63 = external global %struct.S63		; <%struct.S63*> [#uses=1]
+
+declare void @test63(%struct.S63* byval align 4 ) nounwind 
+
+define void @testit63_entry_2E_ce() nounwind  {
+	tail call void @test63( %struct.S63* byval align 4  @g1s63 ) nounwind 
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
new file mode 100644
index 0000000..4eaca17
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mattr=+sse41
+; rdar://5886601
+; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i32 @main() nounwind  {
+entry:
+	%tmp122 = load <2 x i64>* null, align 16		; <<2 x i64>> [#uses=1]
+	%tmp126 = bitcast <2 x i64> %tmp122 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp129 = call <8 x i16> @llvm.x86.sse41.pblendw( <8 x i16> zeroinitializer, <8 x i16> %tmp126, i32 2 ) nounwind 		; <<8 x i16>> [#uses=0]
+	ret i32 0
+}
+
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind 

diff --git a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
new file mode 100644
index 0000000..38d6aa6
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep {1 \$2 3}
+; rdar://5720231
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test() nounwind  {
+entry:
+	tail call void asm sideeffect " ${0:c} $1 ${2:c} ", "imr,imr,i,~{dirflag},~{fpsr},~{flags}"( i32 1, i32 2, i32 3 ) nounwind 
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
new file mode 100644
index 0000000..5b97eb7
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll

@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
+; RUN: not grep {r\[abcd\]x} %t
+; RUN: not grep {r\[ds\]i} %t
+; RUN: not grep {r\[bs\]p} %t
+
+	%struct.BITMAP = type { i16, i16, i32, i32, i32, i32, i32, i32, i8*, i8* }
+	%struct.BltData = type { float, float, float, float }
+	%struct.BltDepth = type { i32, i8**, i32, %struct.BITMAP* (%struct.BltDepth**, %struct.BITMAP*, i32, i32, float*, float, i32)*, i32 (%struct.BltDepth**, %struct.BltOp*)*, i32 (%struct.BltDepth**, %struct.BltOp*, %struct.BltImg*)*, i32 (%struct.BltDepth**, %struct.BltOp*, %struct.BltSh*)*, [28 x [2 x [2 x i32]]]*, %struct.BltData* }
+	%struct.BltImg = type { i32, i8, i8, i8, float, float*, float*, i32, i32, float*, i32 (i8*, i8*, i8**, i32*, i8**, i32*)*, i8* }
+	%struct.BltOp = type { i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i8* }
+	%struct.BltSh = type { i8, i8, i8, i8, float, float*, float*, float*, float*, i32, i32, float*, float*, float* }
+
+define void @t(%struct.BltDepth* %depth, %struct.BltOp* %bop, i32 %mode) nounwind  {
+entry:
+	switch i32 %mode, label %return [
+		 i32 1, label %bb2898.us
+		 i32 18, label %bb13086.preheader
+	]
+
+bb13086.preheader:		; preds = %entry
+	%tmp13098 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%tmp13238 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb13088
+
+bb2898.us:		; preds = %bb2898.us, %entry
+	br label %bb2898.us
+
+bb13088:		; preds = %bb13572, %bb13567, %bb13107, %bb13086.preheader
+	br i1 %tmp13098, label %bb13107, label %bb13101
+
+bb13101:		; preds = %bb13088
+	br label %bb13107
+
+bb13107:		; preds = %bb13101, %bb13088
+	%iftmp.684.0 = phi i32 [ 0, %bb13101 ], [ 65535, %bb13088 ]		; <i32> [#uses=2]
+	%tmp13111 = load i64* null, align 8		; <i64> [#uses=3]
+	%tmp13116 = lshr i64 %tmp13111, 16		; <i64> [#uses=1]
+	%tmp1311613117 = trunc i64 %tmp13116 to i32		; <i32> [#uses=1]
+	%tmp13118 = and i32 %tmp1311613117, 65535		; <i32> [#uses=1]
+	%tmp13120 = lshr i64 %tmp13111, 32		; <i64> [#uses=1]
+	%tmp1312013121 = trunc i64 %tmp13120 to i32		; <i32> [#uses=1]
+	%tmp13122 = and i32 %tmp1312013121, 65535		; <i32> [#uses=2]
+	%tmp13124 = lshr i64 %tmp13111, 48		; <i64> [#uses=1]
+	%tmp1312413125 = trunc i64 %tmp13124 to i32		; <i32> [#uses=2]
+	%tmp1314013141not = xor i16 0, -1		; <i16> [#uses=1]
+	%tmp1314013141not13142 = zext i16 %tmp1314013141not to i32		; <i32> [#uses=3]
+	%tmp13151 = mul i32 %tmp13122, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13154 = mul i32 %tmp1312413125, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13157 = mul i32 %iftmp.684.0, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13171 = add i32 %tmp13151, 1		; <i32> [#uses=1]
+	%tmp13172 = add i32 %tmp13171, 0		; <i32> [#uses=1]
+	%tmp13176 = add i32 %tmp13154, 1		; <i32> [#uses=1]
+	%tmp13177 = add i32 %tmp13176, 0		; <i32> [#uses=1]
+	%tmp13181 = add i32 %tmp13157, 1		; <i32> [#uses=1]
+	%tmp13182 = add i32 %tmp13181, 0		; <i32> [#uses=1]
+	%tmp13188 = lshr i32 %tmp13172, 16		; <i32> [#uses=1]
+	%tmp13190 = lshr i32 %tmp13177, 16		; <i32> [#uses=1]
+	%tmp13192 = lshr i32 %tmp13182, 16		; <i32> [#uses=1]
+	%tmp13198 = sub i32 %tmp13118, 0		; <i32> [#uses=1]
+	%tmp13201 = sub i32 %tmp13122, %tmp13188		; <i32> [#uses=1]
+	%tmp13204 = sub i32 %tmp1312413125, %tmp13190		; <i32> [#uses=1]
+	%tmp13207 = sub i32 %iftmp.684.0, %tmp13192		; <i32> [#uses=1]
+	%tmp1320813209 = zext i32 %tmp13204 to i64		; <i64> [#uses=1]
+	%tmp13211 = shl i64 %tmp1320813209, 48		; <i64> [#uses=1]
+	%tmp1321213213 = zext i32 %tmp13201 to i64		; <i64> [#uses=1]
+	%tmp13214 = shl i64 %tmp1321213213, 32		; <i64> [#uses=1]
+	%tmp13215 = and i64 %tmp13214, 281470681743360		; <i64> [#uses=1]
+	%tmp1321713218 = zext i32 %tmp13198 to i64		; <i64> [#uses=1]
+	%tmp13219 = shl i64 %tmp1321713218, 16		; <i64> [#uses=1]
+	%tmp13220 = and i64 %tmp13219, 4294901760		; <i64> [#uses=1]
+	%tmp13216 = or i64 %tmp13211, 0		; <i64> [#uses=1]
+	%tmp13221 = or i64 %tmp13216, %tmp13215		; <i64> [#uses=1]
+	%tmp13225 = or i64 %tmp13221, %tmp13220		; <i64> [#uses=4]
+	%tmp1322713228 = trunc i32 %tmp13207 to i16		; <i16> [#uses=4]
+	%tmp13233 = icmp eq i16 %tmp1322713228, 0		; <i1> [#uses=1]
+	br i1 %tmp13233, label %bb13088, label %bb13236
+
+bb13236:		; preds = %bb13107
+	br i1 false, label %bb13567, label %bb13252
+
+bb13252:		; preds = %bb13236
+	%tmp1329013291 = zext i16 %tmp1322713228 to i64		; <i64> [#uses=8]
+	%tmp13296 = lshr i64 %tmp13225, 16		; <i64> [#uses=1]
+	%tmp13297 = and i64 %tmp13296, 65535		; <i64> [#uses=1]
+	%tmp13299 = lshr i64 %tmp13225, 32		; <i64> [#uses=1]
+	%tmp13300 = and i64 %tmp13299, 65535		; <i64> [#uses=1]
+	%tmp13302 = lshr i64 %tmp13225, 48		; <i64> [#uses=1]
+	%tmp13306 = sub i64 %tmp1329013291, 0		; <i64> [#uses=0]
+	%tmp13309 = sub i64 %tmp1329013291, %tmp13297		; <i64> [#uses=1]
+	%tmp13312 = sub i64 %tmp1329013291, %tmp13300		; <i64> [#uses=1]
+	%tmp13315 = sub i64 %tmp1329013291, %tmp13302		; <i64> [#uses=1]
+	%tmp13318 = mul i64 %tmp1329013291, %tmp1329013291		; <i64> [#uses=1]
+	br i1 false, label %bb13339, label %bb13324
+
+bb13324:		; preds = %bb13252
+	br i1 false, label %bb13339, label %bb13330
+
+bb13330:		; preds = %bb13324
+	%tmp13337 = sdiv i64 0, 0		; <i64> [#uses=1]
+	br label %bb13339
+
+bb13339:		; preds = %bb13330, %bb13324, %bb13252
+	%r0120.0 = phi i64 [ %tmp13337, %bb13330 ], [ 0, %bb13252 ], [ 4294836225, %bb13324 ]		; <i64> [#uses=1]
+	br i1 false, label %bb13360, label %bb13345
+
+bb13345:		; preds = %bb13339
+	br i1 false, label %bb13360, label %bb13351
+
+bb13351:		; preds = %bb13345
+	%tmp13354 = mul i64 0, %tmp13318		; <i64> [#uses=1]
+	%tmp13357 = sub i64 %tmp1329013291, %tmp13309		; <i64> [#uses=1]
+	%tmp13358 = sdiv i64 %tmp13354, %tmp13357		; <i64> [#uses=1]
+	br label %bb13360
+
+bb13360:		; preds = %bb13351, %bb13345, %bb13339
+	%r1121.0 = phi i64 [ %tmp13358, %bb13351 ], [ 0, %bb13339 ], [ 4294836225, %bb13345 ]		; <i64> [#uses=1]
+	br i1 false, label %bb13402, label %bb13387
+
+bb13387:		; preds = %bb13360
+	br label %bb13402
+
+bb13402:		; preds = %bb13387, %bb13360
+	%r3123.0 = phi i64 [ 0, %bb13360 ], [ 4294836225, %bb13387 ]		; <i64> [#uses=1]
+	%tmp13404 = icmp eq i16 %tmp1322713228, -1		; <i1> [#uses=1]
+	br i1 %tmp13404, label %bb13435, label %bb13407
+
+bb13407:		; preds = %bb13402
+	br label %bb13435
+
+bb13435:		; preds = %bb13407, %bb13402
+	%r0120.1 = phi i64 [ 0, %bb13407 ], [ %r0120.0, %bb13402 ]		; <i64> [#uses=0]
+	%r1121.1 = phi i64 [ 0, %bb13407 ], [ %r1121.0, %bb13402 ]		; <i64> [#uses=0]
+	%r3123.1 = phi i64 [ 0, %bb13407 ], [ %r3123.0, %bb13402 ]		; <i64> [#uses=0]
+	%tmp13450 = mul i64 0, %tmp13312		; <i64> [#uses=0]
+	%tmp13455 = mul i64 0, %tmp13315		; <i64> [#uses=0]
+	%tmp13461 = add i64 0, %tmp1329013291		; <i64> [#uses=1]
+	%tmp13462 = mul i64 %tmp13461, 65535		; <i64> [#uses=1]
+	%tmp13466 = sub i64 %tmp13462, 0		; <i64> [#uses=1]
+	%tmp13526 = add i64 %tmp13466, 1		; <i64> [#uses=1]
+	%tmp13527 = add i64 %tmp13526, 0		; <i64> [#uses=1]
+	%tmp13528 = ashr i64 %tmp13527, 16		; <i64> [#uses=4]
+	%tmp13536 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13537 = shl i64 %tmp13536, 32		; <i64> [#uses=1]
+	%tmp13538 = and i64 %tmp13537, 281470681743360		; <i64> [#uses=1]
+	%tmp13542 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13543 = shl i64 %tmp13542, 16		; <i64> [#uses=1]
+	%tmp13544 = and i64 %tmp13543, 4294901760		; <i64> [#uses=1]
+	%tmp13548 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13549 = and i64 %tmp13548, 65535		; <i64> [#uses=1]
+	%tmp13539 = or i64 %tmp13538, 0		; <i64> [#uses=1]
+	%tmp13545 = or i64 %tmp13539, %tmp13549		; <i64> [#uses=1]
+	%tmp13550 = or i64 %tmp13545, %tmp13544		; <i64> [#uses=1]
+	%tmp1355213553 = trunc i64 %tmp13528 to i16		; <i16> [#uses=1]
+	br label %bb13567
+
+bb13567:		; preds = %bb13435, %bb13236
+	%tsp1040.0.0 = phi i64 [ %tmp13550, %bb13435 ], [ %tmp13225, %bb13236 ]		; <i64> [#uses=0]
+	%tsp1040.1.0 = phi i16 [ %tmp1355213553, %bb13435 ], [ %tmp1322713228, %bb13236 ]		; <i16> [#uses=1]
+	br i1 %tmp13238, label %bb13088, label %bb13572
+
+bb13572:		; preds = %bb13567
+	store i16 %tsp1040.1.0, i16* null, align 2
+	br label %bb13088
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
new file mode 100644
index 0000000..6e8e98d
--- /dev/null
+++ b/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+
+define i64 @t(i64 %maxIdleDuration) nounwind  {
+	call void asm sideeffect "wrmsr", "{cx},A,~{dirflag},~{fpsr},~{flags}"( i32 416, i64 0 ) nounwind 
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
new file mode 100644
index 0000000..a708224
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jnp
+; rdar://5902801
+
+declare void @test2()
+
+define i32 @test(double %p) nounwind {
+	%tmp5 = fcmp uno double %p, 0.000000e+00
+	br i1 %tmp5, label %bb, label %UnifiedReturnBlock
+bb:
+	call void @test2()
+	ret i32 17
+UnifiedReturnBlock:
+	ret i32 42
+}
+

diff --git a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
new file mode 100644
index 0000000..cea0076
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86
+
+	%struct.V = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x i32>, float*, float*, float*, float*, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @t() nounwind  {
+entry:
+	br i1 false, label %bb23816.preheader, label %bb23821
+
+bb23816.preheader:		; preds = %entry
+	%tmp23735 = and i32 0, 2		; <i32> [#uses=0]
+	br label %bb23830
+
+bb23821:		; preds = %entry
+	br i1 false, label %bb23830, label %bb23827
+
+bb23827:		; preds = %bb23821
+	%tmp23829 = getelementptr %struct.V* null, i32 0, i32 42		; <i32*> [#uses=0]
+	br label %bb23830
+
+bb23830:		; preds = %bb23827, %bb23821, %bb23816.preheader
+	%scaledInDst.2.reg2mem.5 = phi i8 [ undef, %bb23827 ], [ undef, %bb23821 ], [ undef, %bb23816.preheader ]		; <i8> [#uses=1]
+	%toBool35047 = icmp eq i8 %scaledInDst.2.reg2mem.5, 0		; <i1> [#uses=1]
+	%bothcond39107 = or i1 %toBool35047, false		; <i1> [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
new file mode 100644
index 0000000..5ceb546
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define fastcc void @glgVectorFloatConversion() nounwind  {
+	%tmp12745 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
+	%tmp12773 = insertelement <4 x float> %tmp12745, float 1.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12774 = insertelement <4 x float> %tmp12773, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp12775 = insertelement <4 x float> %tmp12774, float 1.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp12775, <4 x float>* null, align 16
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
new file mode 100644
index 0000000..4852e89
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll

@@ -0,0 +1,145 @@
+; RUN: llc < %s | grep abort | count 1
+; Calls to abort should all be merged
+
+; ModuleID = '5898899.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.BoundaryAlignment = type { [3 x i8], i8, i16, i16, i8, [2 x i8] }
+
+define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signext  %c, i16 signext  %t, i16 signext  %u, i8 signext  %d) nounwind optsize {
+entry:
+	%str_addr = alloca %struct.BoundaryAlignment		; <%struct.BoundaryAlignment*> [#uses=7]
+	%s_addr = alloca i16		; <i16*> [#uses=1]
+	%j_addr = alloca i32		; <i32*> [#uses=2]
+	%c_addr = alloca i8		; <i8*> [#uses=2]
+	%t_addr = alloca i16		; <i16*> [#uses=2]
+	%u_addr = alloca i16		; <i16*> [#uses=2]
+	%d_addr = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }*		; <{ i64, i64 }*> [#uses=1]
+	%tmp1 = getelementptr { i64, i64 }* %tmp, i32 0, i32 0		; <i64*> [#uses=1]
+	store i64 %str.0, i64* %tmp1
+	%tmp2 = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }*		; <{ i64, i64 }*> [#uses=1]
+	%tmp3 = getelementptr { i64, i64 }* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
+	%bc = bitcast i64* %tmp3 to i8*		; <i8*> [#uses=2]
+	%byte = trunc i64 %str.1 to i8		; <i8> [#uses=1]
+	store i8 %byte, i8* %bc
+	%shft = lshr i64 %str.1, 8		; <i64> [#uses=2]
+	%Loc = getelementptr i8* %bc, i32 1		; <i8*> [#uses=2]
+	%byte4 = trunc i64 %shft to i8		; <i8> [#uses=1]
+	store i8 %byte4, i8* %Loc
+	%shft5 = lshr i64 %shft, 8		; <i64> [#uses=2]
+	%Loc6 = getelementptr i8* %Loc, i32 1		; <i8*> [#uses=2]
+	%byte7 = trunc i64 %shft5 to i8		; <i8> [#uses=1]
+	store i8 %byte7, i8* %Loc6
+	%shft8 = lshr i64 %shft5, 8		; <i64> [#uses=2]
+	%Loc9 = getelementptr i8* %Loc6, i32 1		; <i8*> [#uses=2]
+	%byte10 = trunc i64 %shft8 to i8		; <i8> [#uses=1]
+	store i8 %byte10, i8* %Loc9
+	%shft11 = lshr i64 %shft8, 8		; <i64> [#uses=0]
+	%Loc12 = getelementptr i8* %Loc9, i32 1		; <i8*> [#uses=0]
+	store i16 %s, i16* %s_addr
+	store i32 %j, i32* %j_addr
+	store i8 %c, i8* %c_addr
+	store i16 %t, i16* %t_addr
+	store i16 %u, i16* %u_addr
+	store i8 %d, i8* %d_addr
+	%tmp13 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 0		; <[3 x i8]*> [#uses=1]
+	%tmp1314 = bitcast [3 x i8]* %tmp13 to i32*		; <i32*> [#uses=1]
+	%tmp15 = load i32* %tmp1314, align 4		; <i32> [#uses=1]
+	%tmp16 = shl i32 %tmp15, 14		; <i32> [#uses=1]
+	%tmp17 = ashr i32 %tmp16, 23		; <i32> [#uses=1]
+	%tmp1718 = trunc i32 %tmp17 to i16		; <i16> [#uses=1]
+	%sextl = shl i16 %tmp1718, 7		; <i16> [#uses=1]
+	%sextr = ashr i16 %sextl, 7		; <i16> [#uses=2]
+	%sextl19 = shl i16 %sextr, 7		; <i16> [#uses=1]
+	%sextr20 = ashr i16 %sextl19, 7		; <i16> [#uses=0]
+	%sextl21 = shl i16 %sextr, 7		; <i16> [#uses=1]
+	%sextr22 = ashr i16 %sextl21, 7		; <i16> [#uses=1]
+	%sextr2223 = sext i16 %sextr22 to i32		; <i32> [#uses=1]
+	%tmp24 = load i32* %j_addr, align 4		; <i32> [#uses=1]
+	%tmp25 = icmp ne i32 %sextr2223, %tmp24		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb27
+
+bb:		; preds = %entry
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb27:		; preds = %entry
+	%tmp28 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 1		; <i8*> [#uses=1]
+	%tmp29 = load i8* %tmp28, align 4		; <i8> [#uses=1]
+	%tmp30 = load i8* %c_addr, align 1		; <i8> [#uses=1]
+	%tmp31 = icmp ne i8 %tmp29, %tmp30		; <i1> [#uses=1]
+	%tmp3132 = zext i1 %tmp31 to i8		; <i8> [#uses=1]
+	%toBool33 = icmp ne i8 %tmp3132, 0		; <i1> [#uses=1]
+	br i1 %toBool33, label %bb34, label %bb35
+
+bb34:		; preds = %bb27
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb35:		; preds = %bb27
+	%tmp36 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 2		; <i16*> [#uses=1]
+	%tmp37 = load i16* %tmp36, align 4		; <i16> [#uses=1]
+	%tmp38 = shl i16 %tmp37, 7		; <i16> [#uses=1]
+	%tmp39 = ashr i16 %tmp38, 7		; <i16> [#uses=1]
+	%sextl40 = shl i16 %tmp39, 7		; <i16> [#uses=1]
+	%sextr41 = ashr i16 %sextl40, 7		; <i16> [#uses=2]
+	%sextl42 = shl i16 %sextr41, 7		; <i16> [#uses=1]
+	%sextr43 = ashr i16 %sextl42, 7		; <i16> [#uses=0]
+	%sextl44 = shl i16 %sextr41, 7		; <i16> [#uses=1]
+	%sextr45 = ashr i16 %sextl44, 7		; <i16> [#uses=1]
+	%tmp46 = load i16* %t_addr, align 2		; <i16> [#uses=1]
+	%tmp47 = icmp ne i16 %sextr45, %tmp46		; <i1> [#uses=1]
+	%tmp4748 = zext i1 %tmp47 to i8		; <i8> [#uses=1]
+	%toBool49 = icmp ne i8 %tmp4748, 0		; <i1> [#uses=1]
+	br i1 %toBool49, label %bb50, label %bb51
+
+bb50:		; preds = %bb35
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb51:		; preds = %bb35
+	%tmp52 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 3		; <i16*> [#uses=1]
+	%tmp53 = load i16* %tmp52, align 4		; <i16> [#uses=1]
+	%tmp54 = shl i16 %tmp53, 7		; <i16> [#uses=1]
+	%tmp55 = ashr i16 %tmp54, 7		; <i16> [#uses=1]
+	%sextl56 = shl i16 %tmp55, 7		; <i16> [#uses=1]
+	%sextr57 = ashr i16 %sextl56, 7		; <i16> [#uses=2]
+	%sextl58 = shl i16 %sextr57, 7		; <i16> [#uses=1]
+	%sextr59 = ashr i16 %sextl58, 7		; <i16> [#uses=0]
+	%sextl60 = shl i16 %sextr57, 7		; <i16> [#uses=1]
+	%sextr61 = ashr i16 %sextl60, 7		; <i16> [#uses=1]
+	%tmp62 = load i16* %u_addr, align 2		; <i16> [#uses=1]
+	%tmp63 = icmp ne i16 %sextr61, %tmp62		; <i1> [#uses=1]
+	%tmp6364 = zext i1 %tmp63 to i8		; <i8> [#uses=1]
+	%toBool65 = icmp ne i8 %tmp6364, 0		; <i1> [#uses=1]
+	br i1 %toBool65, label %bb66, label %bb67
+
+bb66:		; preds = %bb51
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb67:		; preds = %bb51
+	%tmp68 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 4		; <i8*> [#uses=1]
+	%tmp69 = load i8* %tmp68, align 4		; <i8> [#uses=1]
+	%tmp70 = load i8* %d_addr, align 1		; <i8> [#uses=1]
+	%tmp71 = icmp ne i8 %tmp69, %tmp70		; <i1> [#uses=1]
+	%tmp7172 = zext i1 %tmp71 to i8		; <i8> [#uses=1]
+	%toBool73 = icmp ne i8 %tmp7172, 0		; <i1> [#uses=1]
+	br i1 %toBool73, label %bb74, label %bb75
+
+bb74:		; preds = %bb67
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb75:		; preds = %bb67
+	br label %return
+
+return:		; preds = %bb75
+	ret void
+}
+
+declare void @abort(...) noreturn nounwind 

diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
new file mode 100644
index 0000000..9cf50f4
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll

@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false | grep mov | count 5
+; PR2343
+
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] }
+	%struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base }
+	%struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_edge_gc = type { %struct.VEC_edge_base }
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] }
+	%struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base }
+	%struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct.VEC_tree_gc = type { %struct.VEC_tree_base }
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 }
+	%struct.basic_block_il_dependent = type { %struct.rtl_bb_info* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.block_symbol = type { [3 x %struct.cfg_stats_d], %struct.object_block*, i64 }
+	%struct.cfg_stats_d = type { i32 }
+	%struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 }
+	%struct.def_optype_d = type { %struct.def_optype_d*, %struct.tree_node** }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.__sbuf*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.__sbuf, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, i32, %struct.__sbuf, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %llvm.dbg.anchor.type, i32, i32, i32, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i8*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i32 }
+	%struct.machine_function = type opaque
+	%struct.maydef_optype_d = type { %struct.maydef_optype_d*, %struct.tree_node*, %struct.tree_node*, %struct.ssa_use_operand_d }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.ssa_use_operand_d = type { %struct.ssa_use_operand_d*, %struct.ssa_use_operand_d*, %struct.tree_node*, %struct.tree_node** }
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.bitmap_head_def*, i32, i8* }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.use_optype_d*, %struct.maydef_optype_d*, %struct.vuse_optype_d*, %struct.maydef_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_minimal = type { %struct.tree_common, %struct.__sbuf, i32, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def*, i32 }
+	%struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.tree_function_decl = type { %struct.tree_decl_non_common, i8, i8, i64, %struct.function* }
+	%struct.tree_node = type { %struct.tree_function_decl }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.cfg_stats_d, void (i8*)*, i8*, %struct.section* }
+	%struct.use_optype_d = type { %struct.use_optype_d*, %struct.ssa_use_operand_d }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.vuse_optype_d = type { %struct.vuse_optype_d*, %struct.tree_node*, %struct.ssa_use_operand_d }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.edge_def* (%struct.edge_def*, %struct.basic_block_def*)* @tree_redirect_edge_and_branch to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.edge_def* @tree_redirect_edge_and_branch(%struct.edge_def* %e1, %struct.basic_block_def* %dest2) nounwind  {
+entry:
+	br label %bb497
+
+bb483:		; preds = %bb497
+	%tmp496 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	br label %bb497
+
+bb497:		; preds = %bb483, %entry
+	%cases.0 = phi %struct.tree_node* [ %tmp496, %bb483 ], [ null, %entry ]		; <%struct.tree_node*> [#uses=1]
+	%last.0 = phi %struct.tree_node* [ %cases.0, %bb483 ], [ undef, %entry ]		; <%struct.tree_node*> [#uses=1]
+	%foo = phi i1 [ 0, %bb483 ], [ 1, %entry ]
+	br i1 %foo, label %bb483, label %bb502
+
+bb502:		; preds = %bb497
+	br i1 %foo, label %bb507, label %bb841
+
+bb507:		; preds = %bb502
+	%tmp517 = getelementptr %struct.tree_node* %last.0, i32 0, i32 0		; <%struct.tree_function_decl*> [#uses=1]
+	%tmp517518 = bitcast %struct.tree_function_decl* %tmp517 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
+	%tmp519 = getelementptr %struct.tree_common* %tmp517518, i32 0, i32 0		; <%struct.tree_node**> [#uses=1]
+	store %struct.tree_node* null, %struct.tree_node** %tmp519, align 4
+	br label %bb841
+
+bb841:		; preds = %bb507, %bb502
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
new file mode 100644
index 0000000..19a7354
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
+
+define void @a(<4 x float>* %x) nounwind  {
+entry:
+        %tmp2 = load <4 x float>* %x, align 1
+        %inv = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %tmp2)
+        store <4 x float> %inv, <4 x float>* %x, align 1
+        ret void
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)

diff --git a/test/CodeGen/X86/2008-05-28-CoalescerBug.ll b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
new file mode 100644
index 0000000..32bf8d4
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-28-CoalescerBug.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR2289
+
+define void @_ada_ca11001() {
+entry:
+        %tmp59 = call i16 @ca11001_0__cartesian_assign( i8 zeroext  0, i8 zeroext  0, i16 undef )               ; <i16> [#uses=0]
+        unreachable
+}
+
+declare i16 @ca11001_0__cartesian_assign(i8 zeroext , i8 zeroext , i16)

diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
new file mode 100644
index 0000000..f1a19ec
--- /dev/null
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=local
+
+@_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
+
+declare i8* @llvm.eh.exception() nounwind 
+
+declare i8* @_Znwm(i32)
+
+declare i8* @__cxa_begin_catch(i8*) nounwind 
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br i1 false, label %bb37, label %bb34
+
+bb34:		; preds = %entry
+	ret i32 1
+
+bb37:		; preds = %entry
+	%tmp12.i.i.i.i.i66 = invoke i8* @_Znwm( i32 12 )
+			to label %tmp12.i.i.i.i.i.noexc65 unwind label %lpad243		; <i8*> [#uses=0]
+
+tmp12.i.i.i.i.i.noexc65:		; preds = %bb37
+	unreachable
+
+lpad243:		; preds = %bb37
+	%eh_ptr244 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=1]
+	store i32 (...)** getelementptr ([5 x i32 (...)*]* @_ZTVN10Evaluation10GridOutputILi3EEE, i32 0, i32 2), i32 (...)*** null, align 8
+	%tmp133 = call i8* @__cxa_begin_catch( i8* %eh_ptr244 ) nounwind 		; <i8*> [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
new file mode 100644
index 0000000..236b7cd
--- /dev/null
+++ b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
+
+	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
+	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+	%struct.routine = type opaque
+@"\01LC" = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+
+define i8* @InArgMsgField(%struct.argument_t* %arg, i8* %str) nounwind  {
+entry:
+	%who = alloca [20 x i8]		; <[20 x i8]*> [#uses=1]
+	%who1 = getelementptr [20 x i8]* %who, i32 0, i32 0		; <i8*> [#uses=2]
+	call void @llvm.memset.i32( i8* %who1, i8 0, i32 20, i32 1 )
+	call void @llvm.memcpy.i32( i8* %who1, i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i32 11, i32 1 )
+	unreachable
+}
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 

diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
new file mode 100644
index 0000000..90af387
--- /dev/null
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | not grep movsd
+; RUN: llc < %s -march=x86 | grep movw
+; RUN: llc < %s -march=x86 | grep addw
+; These transforms are turned off for volatile loads and stores.
+; Check that they weren't turned off for all loads and stores!
+
+@atomic = global double 0.000000e+00		; <double*> [#uses=1]
+@atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
+@ioport = global i32 0		; <i32*> [#uses=1]
+@ioport2 = global i32 0		; <i32*> [#uses=1]
+
+define i16 @f(i64 %x) {
+	%b = bitcast i64 %x to double		; <double> [#uses=1]
+	store double %b, double* @atomic
+	store double 0.000000e+00, double* @atomic2
+	%l = load i32* @ioport		; <i32> [#uses=1]
+	%t = trunc i32 %l to i16		; <i16> [#uses=1]
+	%l2 = load i32* @ioport2		; <i32> [#uses=1]
+	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
+	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	%f = add i16 %t, %t2		; <i16> [#uses=1]
+	ret i16 %f
+}

diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
new file mode 100644
index 0000000..500cd1f
--- /dev/null
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
+
+@atomic = global double 0.000000e+00		; <double*> [#uses=1]
+@atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
+@anything = global i64 0		; <i64*> [#uses=1]
+@ioport = global i32 0		; <i32*> [#uses=2]
+
+define i16 @f(i64 %x, double %y) {
+	%b = bitcast i64 %x to double		; <double> [#uses=1]
+	volatile store double %b, double* @atomic ; one processor operation only
+	volatile store double 0.000000e+00, double* @atomic2 ; one processor operation only
+	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
+	volatile store i64 %b2, i64* @anything ; may transform to store of double
+	%l = volatile load i32* @ioport		; must not narrow
+	%t = trunc i32 %l to i16		; <i16> [#uses=1]
+	%l2 = volatile load i32* @ioport		; must not narrow
+	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
+	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	%f = add i16 %t, %t2		; <i16> [#uses=1]
+	ret i16 %f
+}

diff --git a/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
new file mode 100644
index 0000000..4d4819a
--- /dev/null
+++ b/test/CodeGen/X86/2008-06-16-SubregsBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
+
+define i16 @test(i16* %tmp179) nounwind  {
+	%tmp180 = load i16* %tmp179, align 2		; <i16> [#uses=2]
+	%tmp184 = and i16 %tmp180, -1024		; <i16> [#uses=1]
+	%tmp186 = icmp eq i16 %tmp184, -32768		; <i1> [#uses=1]
+	br i1 %tmp186, label %bb189, label %bb288
+
+bb189:		; preds = %0
+	ret i16 %tmp180
+
+bb288:		; preds = %0
+	ret i16 32
+}

diff --git a/test/CodeGen/X86/2008-06-18-BadShuffle.ll b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
new file mode 100644
index 0000000..66f9065
--- /dev/null
+++ b/test/CodeGen/X86/2008-06-18-BadShuffle.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
+
+; Test to make sure we actually insert the bottom element of the vector
+define <8 x i16> @a(<8 x i16> %a) nounwind  {
+entry:
+	shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> < i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
+	%add = add <8 x i16> %0, %a
+	ret <8 x i16> %add
+}
+

diff --git a/test/CodeGen/X86/2008-06-25-VecISelBug.ll b/test/CodeGen/X86/2008-06-25-VecISelBug.ll
new file mode 100644
index 0000000..72d1907
--- /dev/null
+++ b/test/CodeGen/X86/2008-06-25-VecISelBug.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep pslldq
+
+define void @t() nounwind  {
+entry:
+	%tmp1 = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >
+	%tmp2 = insertelement <4 x float> %tmp1, float 1.000000e+00, i32 3
+	store <4 x float> %tmp2, <4 x float>* null, align 16
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
new file mode 100644
index 0000000..46341fc
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll

@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+
+	%struct.ogg_stream_state = type { i8*, i32, i32, i32, i32*, i64*, i32, i32, i32, i32, [282 x i8], i32, i32, i32, i32, i32, i64, i64 }
+	%struct.res_state = type { i32, i32, i32, i32, float*, float*, i32, i32 }
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+
+declare i32 @strlen(i8*) nounwind readonly 
+
+define i32 @res_init(%struct.res_state* %state, i32 %channels, i32 %outfreq, i32 %infreq, i32 %op1, ...) nounwind  {
+entry:
+	br i1 false, label %bb95, label %bb
+
+bb:		; preds = %entry
+	br i1 false, label %bb95, label %bb24
+
+bb24:		; preds = %bb
+	br i1 false, label %bb40.preheader, label %bb26
+
+bb26:		; preds = %bb24
+	ret i32 -1
+
+bb40.preheader:		; preds = %bb24
+	br i1 false, label %bb39, label %bb49.outer
+
+bb39:		; preds = %bb39, %bb40.preheader
+	shl i32 0, 1		; <i32>:0 [#uses=0]
+	br i1 false, label %bb39, label %bb49.outer
+
+bb49.outer:		; preds = %bb39, %bb40.preheader
+	getelementptr %struct.res_state* %state, i32 0, i32 3		; <i32*>:1 [#uses=0]
+	getelementptr %struct.res_state* %state, i32 0, i32 7		; <i32*>:2 [#uses=0]
+	%base10.1 = select i1 false, float* null, float* null		; <float*> [#uses=1]
+	br label %bb74
+
+bb69:		; preds = %bb74
+	br label %bb71
+
+bb71:		; preds = %bb74, %bb69
+	store float 0.000000e+00, float* null, align 4
+	add i32 0, 1		; <i32>:3 [#uses=1]
+	%indvar.next137 = add i32 %indvar136, 1		; <i32> [#uses=1]
+	br i1 false, label %bb74, label %bb73
+
+bb73:		; preds = %bb71
+	%.rec = add i32 %base10.2.ph.rec, 1		; <i32> [#uses=2]
+	getelementptr float* %base10.1, i32 %.rec		; <float*>:4 [#uses=1]
+	br label %bb74
+
+bb74:		; preds = %bb73, %bb71, %bb49.outer
+	%N13.1.ph = phi i32 [ 0, %bb49.outer ], [ 0, %bb73 ], [ %N13.1.ph, %bb71 ]		; <i32> [#uses=1]
+	%dest12.2.ph = phi float* [ null, %bb49.outer ], [ %4, %bb73 ], [ %dest12.2.ph, %bb71 ]		; <float*> [#uses=1]
+	%x8.0.ph = phi i32 [ 0, %bb49.outer ], [ %3, %bb73 ], [ %x8.0.ph, %bb71 ]		; <i32> [#uses=1]
+	%base10.2.ph.rec = phi i32 [ 0, %bb49.outer ], [ %.rec, %bb73 ], [ %base10.2.ph.rec, %bb71 ]		; <i32> [#uses=2]
+	%indvar136 = phi i32 [ %indvar.next137, %bb71 ], [ 0, %bb73 ], [ 0, %bb49.outer ]		; <i32> [#uses=1]
+	br i1 false, label %bb71, label %bb69
+
+bb95:		; preds = %bb, %entry
+	ret i32 -1
+}
+
+define i32 @read_resampled(i8* %d, float** %buffer, i32 %samples) nounwind  {
+entry:
+	br i1 false, label %bb17.preheader, label %bb30
+
+bb17.preheader:		; preds = %entry
+	load i32* null, align 4		; <i32>:0 [#uses=0]
+	br label %bb16
+
+bb16:		; preds = %bb16, %bb17.preheader
+	%i1.036 = phi i32 [ 0, %bb17.preheader ], [ %1, %bb16 ]		; <i32> [#uses=1]
+	add i32 %i1.036, 1		; <i32>:1 [#uses=2]
+	icmp ult i32 %1, 0		; <i1>:2 [#uses=0]
+	br label %bb16
+
+bb30:		; preds = %entry
+	ret i32 0
+}
+
+define i32 @ogg_stream_reset_serialno(%struct.ogg_stream_state* %os, i32 %serialno) nounwind  {
+entry:
+	unreachable
+}
+
+define void @vorbis_lsp_to_curve(float* %curve, i32* %map, i32 %n, i32 %ln, float* %lsp, i32 %m, float %amp, float %ampoffset) nounwind  {
+entry:
+	unreachable
+}
+
+define i32 @vorbis_comment_query_count(%struct.vorbis_comment* %vc, i8* %tag) nounwind  {
+entry:
+	%strlen = call i32 @strlen( i8* null )		; <i32> [#uses=1]
+	%endptr = getelementptr i8* null, i32 %strlen		; <i8*> [#uses=0]
+	unreachable
+}
+
+define fastcc i32 @push(%struct.res_state* %state, float* %pool, i32* %poolfill, i32* %offset, float* %dest, i32 %dststep, float* %source, i32 %srcstep, i32 %srclen) nounwind  {
+entry:
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
new file mode 100644
index 0000000..1a786ef
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep ax
+; PR2024
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @foo(i32 %A, i32 %B) nounwind  section ".init.text" {
+entry:
+	tail call i32 @bar( i32 %A, i32 %B ) nounwind 		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
+
+declare i32 @bar(i32, i32)

diff --git a/test/CodeGen/X86/2008-07-11-SHLBy1.ll b/test/CodeGen/X86/2008-07-11-SHLBy1.ll
new file mode 100644
index 0000000..ff2b05f
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-11-SHLBy1.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 -o - | not grep shr
+define i128 @sl(i128 %x) {
+        %t = shl i128 %x, 1
+        ret i128 %t
+}

diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
new file mode 100644
index 0000000..cd99c0e
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim -post-RA-scheduler=false -asm-verbose=0 | FileCheck %s
+; PR2536
+
+
+; CHECK: movw %cx
+; CHECK-NEXT: andl    $65534, %
+; CHECK-NEXT: movl %
+; CHECK-NEXT: movl $17
+
+@g_5 = external global i16		; <i16*> [#uses=2]
+@g_107 = external global i16		; <i16*> [#uses=1]
+@g_229 = external global i32		; <i32*> [#uses=1]
+@g_227 = external global i16		; <i16*> [#uses=1]
+
+define i32 @func_54(i32 %p_55, i16 zeroext  %p_56) nounwind  {
+entry:
+	load i16* @g_5, align 2		; <i16>:0 [#uses=1]
+	zext i16 %0 to i32		; <i32>:1 [#uses=1]
+	%.mask = and i32 %1, 65534		; <i32> [#uses=1]
+	icmp eq i32 %.mask, 0		; <i1>:2 [#uses=1]
+	load i32* @g_229, align 4		; <i32>:3 [#uses=1]
+	load i16* @g_227, align 2		; <i16>:4 [#uses=1]
+	icmp eq i16 %4, 0		; <i1>:5 [#uses=1]
+	load i16* @g_5, align 2		; <i16>:6 [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb7.preheader, %entry
+	%indvar4 = phi i32 [ 0, %entry ], [ %indvar.next5, %bb7.preheader ]		; <i32> [#uses=1]
+	%p_56_addr.1.reg2mem.0 = phi i16 [ %p_56, %entry ], [ %p_56_addr.0, %bb7.preheader ]		; <i16> [#uses=2]
+	br i1 %2, label %bb7.preheader, label %bb5
+
+bb5:		; preds = %bb
+	store i16 %6, i16* @g_107, align 2
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb5, %bb
+	icmp eq i16 %p_56_addr.1.reg2mem.0, 0		; <i1>:7 [#uses=1]
+	%.0 = select i1 %7, i32 1, i32 %3		; <i32> [#uses=1]
+	urem i32 1, %.0		; <i32>:8 [#uses=1]
+	icmp eq i32 %8, 0		; <i1>:9 [#uses=1]
+	%.not = xor i1 %9, true		; <i1> [#uses=1]
+	%.not1 = xor i1 %5, true		; <i1> [#uses=1]
+	%brmerge = or i1 %.not, %.not1		; <i1> [#uses=1]
+	%iftmp.6.0 = select i1 %brmerge, i32 3, i32 0		; <i32> [#uses=1]
+	mul i32 %iftmp.6.0, %3		; <i32>:10 [#uses=1]
+	icmp eq i32 %10, 0		; <i1>:11 [#uses=1]
+	%p_56_addr.0 = select i1 %11, i16 %p_56_addr.1.reg2mem.0, i16 1		; <i16> [#uses=1]
+	%indvar.next5 = add i32 %indvar4, 1		; <i32> [#uses=2]
+	%exitcond6 = icmp eq i32 %indvar.next5, 17		; <i1> [#uses=1]
+	br i1 %exitcond6, label %bb25, label %bb
+
+bb25:		; preds = %bb7.preheader
+	ret i32 1
+}

diff --git a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
new file mode 100644
index 0000000..f56604b
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+	%struct.SV = type { i8*, i64, i64 }
+@"\01LC25" = external constant [8 x i8]		; <[8 x i8]*> [#uses=1]
+
+declare void @Perl_sv_catpvf(%struct.SV*, i8*, ...) nounwind 
+
+declare fastcc i64 @Perl_utf8n_to_uvuni(i8*, i64, i64*, i64) nounwind 
+
+define fastcc i8* @Perl_pv_uni_display(%struct.SV* %dsv, i8* %spv, i64 %len, i64 %pvlim, i64 %flags) nounwind  {
+entry:
+	br i1 false, label %bb, label %bb40
+
+bb:		; preds = %entry
+	tail call fastcc i64 @Perl_utf8n_to_uvuni( i8* null, i64 13, i64* null, i64 255 ) nounwind 		; <i64>:0 [#uses=1]
+	br i1 false, label %bb6, label %bb33
+
+bb6:		; preds = %bb
+	br i1 false, label %bb30, label %bb31
+
+bb30:		; preds = %bb6
+	unreachable
+
+bb31:		; preds = %bb6
+	icmp eq i8 0, 0		; <i1>:1 [#uses=0]
+	br label %bb33
+
+bb33:		; preds = %bb31, %bb
+	tail call void (%struct.SV*, i8*, ...)* @Perl_sv_catpvf( %struct.SV* %dsv, i8* getelementptr ([8 x i8]* @"\01LC25", i32 0, i64 0), i64 %0 ) nounwind 
+	unreachable
+
+bb40:		; preds = %entry
+	ret i8* null
+}

diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
new file mode 100644
index 0000000..98919ee
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll

@@ -0,0 +1,636 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movaps | count 75
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movaps | count 1
+; PR2539
+
+external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
+external global <4 x float>, align 1		; <<4 x float>*>:1 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:2 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:3 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:4 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:5 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:6 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:7 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:8 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:9 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:10 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:11 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:12 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:13 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:14 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:15 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:16 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:17 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:18 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:19 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:20 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:21 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:22 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:23 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:24 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:25 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:26 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:27 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:28 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:29 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:30 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:31 [#uses=1]
+
+declare void @abort()
+
+define void @""() {
+	load <4 x float>* @0, align 1		; <<4 x float>>:1 [#uses=2]
+	load <4 x float>* @1, align 1		; <<4 x float>>:2 [#uses=3]
+	load <4 x float>* @2, align 1		; <<4 x float>>:3 [#uses=4]
+	load <4 x float>* @3, align 1		; <<4 x float>>:4 [#uses=5]
+	load <4 x float>* @4, align 1		; <<4 x float>>:5 [#uses=6]
+	load <4 x float>* @5, align 1		; <<4 x float>>:6 [#uses=7]
+	load <4 x float>* @6, align 1		; <<4 x float>>:7 [#uses=8]
+	load <4 x float>* @7, align 1		; <<4 x float>>:8 [#uses=9]
+	load <4 x float>* @8, align 1		; <<4 x float>>:9 [#uses=10]
+	load <4 x float>* @9, align 1		; <<4 x float>>:10 [#uses=11]
+	load <4 x float>* @10, align 1		; <<4 x float>>:11 [#uses=12]
+	load <4 x float>* @11, align 1		; <<4 x float>>:12 [#uses=13]
+	load <4 x float>* @12, align 1		; <<4 x float>>:13 [#uses=14]
+	load <4 x float>* @13, align 1		; <<4 x float>>:14 [#uses=15]
+	load <4 x float>* @14, align 1		; <<4 x float>>:15 [#uses=16]
+	load <4 x float>* @15, align 1		; <<4 x float>>:16 [#uses=17]
+	load <4 x float>* @16, align 1		; <<4 x float>>:17 [#uses=18]
+	load <4 x float>* @17, align 1		; <<4 x float>>:18 [#uses=19]
+	load <4 x float>* @18, align 1		; <<4 x float>>:19 [#uses=20]
+	load <4 x float>* @19, align 1		; <<4 x float>>:20 [#uses=21]
+	load <4 x float>* @20, align 1		; <<4 x float>>:21 [#uses=22]
+	load <4 x float>* @21, align 1		; <<4 x float>>:22 [#uses=23]
+	load <4 x float>* @22, align 1		; <<4 x float>>:23 [#uses=24]
+	load <4 x float>* @23, align 1		; <<4 x float>>:24 [#uses=25]
+	load <4 x float>* @24, align 1		; <<4 x float>>:25 [#uses=26]
+	load <4 x float>* @25, align 1		; <<4 x float>>:26 [#uses=27]
+	load <4 x float>* @26, align 1		; <<4 x float>>:27 [#uses=28]
+	load <4 x float>* @27, align 1		; <<4 x float>>:28 [#uses=29]
+	load <4 x float>* @28, align 1		; <<4 x float>>:29 [#uses=30]
+	load <4 x float>* @29, align 1		; <<4 x float>>:30 [#uses=31]
+	load <4 x float>* @30, align 1		; <<4 x float>>:31 [#uses=32]
+	load <4 x float>* @31, align 1		; <<4 x float>>:32 [#uses=33]
+	fmul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
+	fmul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> %35, %4		; <<4 x float>>:36 [#uses=1]
+	fmul <4 x float> %36, %5		; <<4 x float>>:37 [#uses=1]
+	fmul <4 x float> %37, %6		; <<4 x float>>:38 [#uses=1]
+	fmul <4 x float> %38, %7		; <<4 x float>>:39 [#uses=1]
+	fmul <4 x float> %39, %8		; <<4 x float>>:40 [#uses=1]
+	fmul <4 x float> %40, %9		; <<4 x float>>:41 [#uses=1]
+	fmul <4 x float> %41, %10		; <<4 x float>>:42 [#uses=1]
+	fmul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
+	fmul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
+	fmul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
+	fmul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
+	fmul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
+	fmul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
+	fmul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
+	fmul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
+	fmul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
+	fmul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
+	fmul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
+	fmul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
+	fmul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
+	fmul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
+	fmul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
+	fmul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
+	fmul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
+	fmul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
+	fmul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
+	fmul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
+	fmul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
+	fmul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
+	fmul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
+	fmul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
+	fmul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
+	fmul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
+	fmul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
+	fmul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
+	fmul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
+	fmul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
+	fmul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
+	fmul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
+	fmul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
+	fmul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
+	fmul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
+	fmul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
+	fmul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
+	fmul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
+	fmul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
+	fmul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
+	fmul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
+	fmul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
+	fmul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
+	fmul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
+	fmul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
+	fmul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
+	fmul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
+	fmul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
+	fmul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
+	fmul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
+	fmul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
+	fmul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
+	fmul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
+	fmul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
+	fmul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
+	fmul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
+	fmul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
+	fmul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
+	fmul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
+	fmul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
+	fmul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
+	fmul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
+	fmul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
+	fmul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
+	fmul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
+	fmul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
+	fmul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
+	fmul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
+	fmul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
+	fmul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
+	fmul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
+	fmul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
+	fmul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
+	fmul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
+	fmul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
+	fmul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
+	fmul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
+	fmul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
+	fmul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
+	fmul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
+	fmul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
+	fmul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
+	fmul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
+	fmul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
+	fmul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
+	fmul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
+	fmul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
+	fmul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
+	fmul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
+	fmul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
+	fmul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
+	fmul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
+	fmul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
+	fmul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
+	fmul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
+	fmul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
+	fmul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
+	fmul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
+	fmul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
+	fmul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
+	fmul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
+	fmul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
+	fmul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
+	fmul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
+	fmul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
+	fmul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
+	fmul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
+	fmul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
+	fmul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
+	fmul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
+	fmul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
+	fmul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
+	fmul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
+	fmul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
+	fmul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
+	fmul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
+	fmul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
+	fmul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
+	fmul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
+	fmul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
+	fmul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
+	fmul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
+	fmul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
+	fmul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
+	fmul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
+	fmul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
+	fmul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
+	fmul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
+	fmul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
+	fmul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
+	fmul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
+	fmul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
+	fmul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
+	fmul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
+	fmul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
+	fmul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
+	fmul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
+	fmul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
+	fmul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
+	fmul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
+	fmul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
+	fmul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
+	fmul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
+	fmul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
+	fmul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
+	fmul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
+	fmul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
+	fmul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
+	fmul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
+	fmul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
+	fmul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
+	fmul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
+	fmul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
+	fmul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
+	fmul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
+	fmul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
+	fmul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
+	fmul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
+	fmul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
+	fmul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
+	fmul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
+	fmul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
+	fmul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
+	fmul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
+	fmul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
+	fmul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
+	fmul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
+	fmul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
+	fmul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
+	fmul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
+	fmul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
+	fmul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
+	fmul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
+	fmul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
+	fmul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
+	fmul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
+	fmul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
+	fmul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
+	fmul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
+	fmul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
+	fmul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
+	fmul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
+	fmul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
+	fmul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
+	fmul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
+	fmul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
+	fmul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
+	fmul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
+	fmul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
+	fmul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
+	fmul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
+	fmul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
+	fmul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
+	fmul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
+	fmul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
+	fmul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
+	fmul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
+	fmul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
+	fmul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
+	fmul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
+	fmul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
+	fmul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
+	fmul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
+	fmul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
+	fmul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
+	fmul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
+	fmul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
+	fmul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
+	fmul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
+	fmul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
+	fmul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
+	fmul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
+	fmul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
+	fmul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
+	fmul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
+	fmul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
+	fmul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
+	fmul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
+	fmul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
+	fmul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
+	fmul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
+	fmul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
+	fmul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
+	fmul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
+	fmul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
+	fmul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
+	fmul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
+	fmul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
+	fmul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
+	fmul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
+	fmul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
+	fmul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
+	fmul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
+	fmul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
+	fmul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
+	fmul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
+	fmul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
+	fmul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
+	fmul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
+	fmul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
+	fmul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
+	fmul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
+	fmul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
+	fmul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
+	fmul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
+	fmul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
+	fmul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
+	fmul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
+	fmul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
+	fmul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
+	fmul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
+	fmul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
+	fmul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
+	fmul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
+	fmul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
+	fmul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
+	fmul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
+	fmul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
+	fmul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
+	fmul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
+	fmul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
+	fmul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
+	fmul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
+	fmul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
+	fmul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
+	fmul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
+	fmul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
+	fmul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
+	fmul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
+	fmul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
+	fmul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
+	fmul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
+	fmul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
+	fmul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
+	fmul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
+	fmul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
+	fmul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
+	fmul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
+	fmul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
+	fmul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
+	fmul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
+	fmul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
+	fmul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
+	fmul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
+	fmul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
+	fmul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
+	fmul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
+	fmul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
+	fmul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
+	fmul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
+	fmul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
+	fmul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
+	fmul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
+	fmul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
+	fmul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
+	fmul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
+	fmul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
+	fmul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
+	fmul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
+	fmul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
+	fmul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
+	fmul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
+	fmul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
+	fmul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
+	fmul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
+	fmul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
+	fmul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
+	fmul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
+	fmul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
+	fmul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
+	fmul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
+	fmul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
+	fmul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
+	fmul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
+	fmul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
+	fmul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
+	fmul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
+	fmul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
+	fmul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
+	fmul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
+	fmul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
+	fmul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
+	fmul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
+	fmul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
+	fmul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
+	fmul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
+	fmul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
+	fmul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
+	fmul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
+	fmul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
+	fmul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
+	fmul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
+	fmul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
+	fmul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
+	fmul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
+	fmul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
+	fmul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
+	fmul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
+	fmul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
+	fmul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
+	fmul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
+	fmul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
+	fmul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
+	fmul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
+	fmul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
+	fmul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
+	fmul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
+	fmul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
+	fmul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
+	fmul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
+	fmul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
+	fmul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
+	fmul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
+	fmul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
+	fmul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
+	fmul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
+	fmul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
+	fmul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
+	fmul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
+	fmul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
+	fmul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
+	fmul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
+	fmul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
+	fmul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
+	fmul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
+	fmul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
+	fmul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
+	fmul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
+	fmul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
+	fmul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
+	fmul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
+	fmul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
+	fmul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
+	fmul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
+	fmul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
+	fmul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
+	fmul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
+	fmul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
+	fmul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
+	fmul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
+	fmul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
+	fmul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
+	fmul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
+	fmul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
+	fmul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
+	fmul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
+	fmul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
+	fmul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
+	fmul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
+	fmul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
+	fmul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
+	fmul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
+	fmul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
+	fmul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
+	fmul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
+	fmul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
+	fmul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
+	fmul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
+	fmul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
+	fmul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
+	fmul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
+	fmul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
+	fmul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
+	fmul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
+	fmul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
+	fmul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
+	fmul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
+	fmul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
+	fmul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
+	fmul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
+	fmul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
+	fmul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
+	fmul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
+	fmul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
+	fmul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
+	fmul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
+	fmul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
+	fmul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
+	fmul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
+	fmul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
+	fmul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
+	fmul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
+	fmul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
+	fmul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
+	fmul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
+	fmul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
+	fmul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
+	fmul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
+	fmul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
+	fmul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
+	fmul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
+	fmul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
+	fmul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
+	fmul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
+	fmul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
+	fmul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
+	fmul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
+	fmul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
+	fmul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
+	fmul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
+	fmul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
+	fmul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
+	fmul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
+	fmul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
+	fmul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
+	fmul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
+	fmul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
+	fmul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
+	fmul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
+	fmul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
+	fmul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
+	fmul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
+	fmul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
+	fmul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
+	fmul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
+	fmul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
+	fmul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
+	fmul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
+	fmul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
+	fmul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
+	fmul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
+	fmul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
+	fmul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
+	fmul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
+	fmul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
+	fmul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
+	fmul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
+	fmul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
+	fmul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
+	fmul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
+	fmul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
+	fmul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
+	fmul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
+	fmul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
+	fmul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
+	fmul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
+	fmul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
+	fmul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
+	fmul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
+	fmul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
+	fmul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
+	fmul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
+	fmul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
+	fmul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
+	fmul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
+	fmul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
+	fmul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
+	fmul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
+	fmul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
+	fmul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
+	fmul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
+	fmul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
+	fmul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
+	fmul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
+	fmul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
+	fmul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
+	fmul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
+	fmul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
+	fmul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
+	fmul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
+	fmul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
+	fmul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
+	fmul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
+	fmul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
+	fmul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
+	fmul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
+	fmul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
+	fmul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
+	fmul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
+	fmul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
+	fmul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
+	fmul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
+	fmul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
+	fmul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
+	fmul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
+	fmul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
+	fmul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
+	fmul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
+	fadd <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
+	fadd <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
+	fadd <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
+	fadd <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
+	fadd <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
+	fadd <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
+	fadd <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
+	fadd <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
+	fadd <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
+	fadd <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
+	fadd <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
+	fadd <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
+	fadd <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
+	fadd <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
+	fadd <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
+	fadd <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
+	fadd <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
+	fadd <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
+	fadd <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
+	fadd <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
+	fadd <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
+	fadd <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
+	fadd <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
+	fadd <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
+	fadd <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
+	fadd <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
+	fadd <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
+	fadd <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
+	fadd <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
+	fadd <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
+	fadd <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
+	fadd <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
+	fadd <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
+	store <4 x float> %593, <4 x float>* @0, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
new file mode 100644
index 0000000..0f67145
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2566
+
+external global i16		; <i16*>:0 [#uses=1]
+external global <4 x i16>		; <<4 x i16>*>:1 [#uses=1]
+
+declare void @abort()
+
+define void @t() nounwind {
+	load i16* @0		; <i16>:1 [#uses=1]
+	zext i16 %1 to i64		; <i64>:2 [#uses=1]
+	bitcast i64 %2 to <4 x i16>		; <<4 x i16>>:3 [#uses=1]
+	shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer		; <<4 x i16>>:4 [#uses=1]
+	store <4 x i16> %4, <4 x i16>* @1
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-07-23-VSetCC.ll b/test/CodeGen/X86/2008-07-23-VSetCC.ll
new file mode 100644
index 0000000..684ca5c
--- /dev/null
+++ b/test/CodeGen/X86/2008-07-23-VSetCC.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium
+; PR2575
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind  {
+	br i1 false, label %bb.nph, label %._crit_edge
+
+bb.nph:		; preds = %bb.nph, %0
+	%X = icmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+        sext <4 x i1> %X to <4 x i32>
+	extractelement <4 x i32> %1, i32 3		; <i32>:2 [#uses=1]
+	lshr i32 %2, 31		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i1		; <i1>:4 [#uses=1]
+	select i1 %4, i32 -1, i32 0		; <i32>:5 [#uses=1]
+	insertelement <4 x i32> zeroinitializer, i32 %5, i32 3		; <<4 x i32>>:6 [#uses=1]
+	and <4 x i32> zeroinitializer, %6		; <<4 x i32>>:7 [#uses=1]
+	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=1]
+	fmul <4 x float> zeroinitializer, %8		; <<4 x float>>:9 [#uses=1]
+	bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>>:10 [#uses=1]
+	or <4 x i32> %10, zeroinitializer		; <<4 x i32>>:11 [#uses=1]
+	bitcast <4 x i32> %11 to <4 x float>		; <<4 x float>>:12 [#uses=1]
+	fmul <4 x float> %12, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:13 [#uses=1]
+	fsub <4 x float> %13, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:14 [#uses=1]
+	extractelement <4 x float> %14, i32 3		; <float>:15 [#uses=1]
+	call float @fmaxf( float 0.000000e+00, float %15 )		; <float>:16 [#uses=0]
+	br label %bb.nph
+
+._crit_edge:		; preds = %0
+	ret void
+}
+
+
+declare float @fmaxf(float, float)

diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll
new file mode 100644
index 0000000..67e14ff
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
+; PR2568
+
+@g_3 = external global i16		; <i16*> [#uses=1]
+@g_5 = external global i32		; <i32*> [#uses=3]
+
+declare i32 @func_15(i16 signext , i16 signext , i32) nounwind 
+
+define void @func_9_entry_2E_ce(i8 %p_11) nounwind {
+newFuncRoot:
+	br label %entry.ce
+
+entry.ce.ret.exitStub:		; preds = %entry.ce
+	ret void
+
+entry.ce:		; preds = %newFuncRoot
+	load i16* @g_3, align 2		; <i16>:0 [#uses=1]
+	icmp sgt i16 %0, 0		; <i1>:1 [#uses=1]
+	zext i1 %1 to i32		; <i32>:2 [#uses=1]
+	load i32* @g_5, align 4		; <i32>:3 [#uses=4]
+	icmp ugt i32 %2, %3		; <i1>:4 [#uses=1]
+	zext i1 %4 to i32		; <i32>:5 [#uses=1]
+	icmp eq i32 %3, 0		; <i1>:6 [#uses=1]
+	%.0 = select i1 %6, i32 1, i32 %3		; <i32> [#uses=1]
+	urem i32 1, %.0		; <i32>:7 [#uses=2]
+	sext i8 %p_11 to i16		; <i16>:8 [#uses=1]
+	trunc i32 %3 to i16		; <i16>:9 [#uses=1]
+	tail call i32 @func_15( i16 signext  %8, i16 signext  %9, i32 1 ) nounwind 		; <i32>:10 [#uses=0]
+	load i32* @g_5, align 4		; <i32>:11 [#uses=1]
+	trunc i32 %11 to i16		; <i16>:12 [#uses=1]
+	tail call i32 @func_15( i16 signext  %12, i16 signext  1, i32 %7 ) nounwind 		; <i32>:13 [#uses=0]
+	sext i8 %p_11 to i32		; <i32>:14 [#uses=1]
+	%p_11.lobit = lshr i8 %p_11, 7		; <i8> [#uses=1]
+	%tmp = zext i8 %p_11.lobit to i32		; <i32> [#uses=1]
+	%tmp.not = xor i32 %tmp, 1		; <i32> [#uses=1]
+	%.015 = ashr i32 %14, %tmp.not		; <i32> [#uses=2]
+	icmp eq i32 %.015, 0		; <i1>:15 [#uses=1]
+	%.016 = select i1 %15, i32 1, i32 %.015		; <i32> [#uses=1]
+	udiv i32 %7, %.016		; <i32>:16 [#uses=1]
+	icmp ult i32 %5, %16		; <i1>:17 [#uses=1]
+	zext i1 %17 to i32		; <i32>:18 [#uses=1]
+	store i32 %18, i32* @g_5, align 4
+	br label %entry.ce.ret.exitStub
+}

diff --git a/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
new file mode 100644
index 0000000..4428035
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-06-RewriterBug.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86
+; PR2596
+
+@data = external global [400 x i64]		; <[400 x i64]*> [#uses=5]
+
+define void @foo(double* noalias, double* noalias) {
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 200), align 4		; <i64>:3 [#uses=1]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 199), align 4		; <i64>:4 [#uses=1]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 198), align 4		; <i64>:5 [#uses=2]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 197), align 4		; <i64>:6 [#uses=1]
+	br i1 false, label %28, label %7
+
+; <label>:7		; preds = %2
+	load double** getelementptr (double** bitcast ([400 x i64]* @data to double**), i64 180), align 8		; <double*>:8 [#uses=1]
+	bitcast double* %8 to double*		; <double*>:9 [#uses=1]
+	ptrtoint double* %9 to i64		; <i64>:10 [#uses=1]
+	mul i64 %4, %3		; <i64>:11 [#uses=1]
+	add i64 0, %11		; <i64>:12 [#uses=1]
+	shl i64 %12, 3		; <i64>:13 [#uses=1]
+	sub i64 %10, %13		; <i64>:14 [#uses=1]
+	add i64 %5, 0		; <i64>:15 [#uses=1]
+	shl i64 %15, 3		; <i64>:16 [#uses=1]
+	bitcast i64 %16 to i64		; <i64>:17 [#uses=1]
+	mul i64 %6, %5		; <i64>:18 [#uses=1]
+	add i64 0, %18		; <i64>:19 [#uses=1]
+	shl i64 %19, 3		; <i64>:20 [#uses=1]
+	sub i64 %17, %20		; <i64>:21 [#uses=1]
+	add i64 0, %21		; <i64>:22 [#uses=1]
+	add i64 0, %14		; <i64>:23 [#uses=1]
+	br label %24
+
+; <label>:24		; preds = %24, %7
+	phi i64 [ 0, %24 ], [ %22, %7 ]		; <i64>:25 [#uses=1]
+	phi i64 [ 0, %24 ], [ %23, %7 ]		; <i64>:26 [#uses=0]
+	add i64 %25, 24		; <i64>:27 [#uses=0]
+	br label %24
+
+; <label>:28		; preds = %2
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
new file mode 100644
index 0000000..32f6ca0
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl
+
+define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+	tail call i32 @llvm.x86.sse.ucomige.ss( <4 x float> %a, <4 x float> %b ) nounwind readnone
+	ret i32 %0
+}
+
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone

diff --git a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
new file mode 100644
index 0000000..8475e8d
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+@var = external global i64		; <i64*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+; CHECK: main:
+; CHECK: lock
+; CHECK: decq
+	tail call i64 @llvm.atomic.load.sub.i64.p0i64( i64* @var, i64 1 )		; <i64>:0 [#uses=0]
+	unreachable
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind

diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
new file mode 100644
index 0000000..c76dd7d
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QClipData = type { i32, %"struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
+	%"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
+	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRect, %struct.QRegion, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i8, i32, i32, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
+	%struct.QRect = type { i32, i32, i32, i32 }
+	%struct.QRegion = type { %"struct.QRegion::QRegionData"* }
+	%"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
+	%struct.QRegionPrivate = type opaque
+	%struct.QT_FT_Span = type { i16, i16, i16, i8 }
+	%struct._XRegion = type opaque
+
+define hidden void @_Z24qt_bitmapblit16_sse3dnowP13QRasterBufferiijPKhiii(%struct.QRasterBuffer* %rasterBuffer, i32 %x, i32 %y, i32 %color, i8* %src, i32 %width, i32 %height, i32 %stride) nounwind {
+entry:
+	br i1 false, label %bb.nph144.split, label %bb133
+
+bb.nph144.split:		; preds = %entry
+	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> zeroinitializer, i8* null ) nounwind
+	unreachable
+
+bb133:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*) nounwind

diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
new file mode 100644
index 0000000..eacb4a5
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movd | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq
+; PR2677
+
+
+	%struct.Bigint = type { %struct.Bigint*, i32, i32, i32, i32, [1 x i32] }
+
+define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
+entry:
+	br i1 false, label %bb151, label %bb163
+
+bb151:		; preds = %entry
+	br label %bb163
+
+bb163:		; preds = %bb151, %entry
+	%tmp366 = load double* null, align 8		; <double> [#uses=1]
+	%tmp368 = fmul double %tmp366, 0.000000e+00		; <double> [#uses=1]
+	%tmp368226 = bitcast double %tmp368 to i64		; <i64> [#uses=1]
+	br label %bb5.i
+
+bb5.i:		; preds = %bb5.i57.i, %bb163
+	%b.0.i = phi %struct.Bigint* [ null, %bb163 ], [ %tmp9.i.i41.i, %bb5.i57.i ]		; <%struct.Bigint*> [#uses=1]
+	%tmp3.i7.i728 = load i32* null, align 4		; <i32> [#uses=1]
+	br label %bb.i27.i
+
+bb.i27.i:		; preds = %bb.i27.i, %bb5.i
+	%tmp23.i20.i = lshr i32 0, 16		; <i32> [#uses=1]
+	br i1 false, label %bb.i27.i, label %bb5.i57.i
+
+bb5.i57.i:		; preds = %bb.i27.i
+	%tmp50.i35.i = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp51.i36.i = add i32 %tmp50.i35.i, 1		; <i32> [#uses=2]
+	%tmp2.i.i37.i = shl i32 1, %tmp51.i36.i		; <i32> [#uses=2]
+	%tmp4.i.i38.i = shl i32 %tmp2.i.i37.i, 2		; <i32> [#uses=1]
+	%tmp7.i.i39.i = add i32 %tmp4.i.i38.i, 28		; <i32> [#uses=1]
+	%tmp8.i.i40.i = malloc i8, i32 %tmp7.i.i39.i		; <i8*> [#uses=1]
+	%tmp9.i.i41.i = bitcast i8* %tmp8.i.i40.i to %struct.Bigint*		; <%struct.Bigint*> [#uses=2]
+	store i32 %tmp51.i36.i, i32* null, align 8
+	store i32 %tmp2.i.i37.i, i32* null, align 4
+	free %struct.Bigint* %b.0.i
+	store i32 %tmp23.i20.i, i32* null, align 4
+	%tmp74.i61.i = add i32 %tmp3.i7.i728, 1		; <i32> [#uses=1]
+	store i32 %tmp74.i61.i, i32* null, align 4
+	br i1 false, label %bb5.i, label %bb7.i
+
+bb7.i:		; preds = %bb5.i57.i
+	%tmp514 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp515 = sext i32 %tmp514 to i64		; <i64> [#uses=1]
+	%tmp516 = shl i64 %tmp515, 2		; <i64> [#uses=1]
+	%tmp517 = add i64 %tmp516, 8		; <i64> [#uses=1]
+	%tmp519 = getelementptr %struct.Bigint* %tmp9.i.i41.i, i32 0, i32 3		; <i32*> [#uses=1]
+	%tmp523 = bitcast i32* %tmp519 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i64( i8* null, i8* %tmp523, i64 %tmp517, i32 1 )
+	%tmp524136 = bitcast i64 %tmp368226 to double		; <double> [#uses=1]
+	store double %tmp524136, double* null
+	unreachable
+}
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind

diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
new file mode 100644
index 0000000..101b3c5
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movapd
+; PR2715
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.XPTTypeDescriptorPrefix = type { i8 }
+	%struct.nsISupports = type { i32 (...)** }
+	%struct.nsXPTCMiniVariant = type { %"struct.nsXPTCMiniVariant::._39" }
+	%"struct.nsXPTCMiniVariant::._39" = type { i64 }
+	%struct.nsXPTCVariant = type { %struct.nsXPTCMiniVariant, i8*, %struct.nsXPTType, i8 }
+	%struct.nsXPTType = type { %struct.XPTTypeDescriptorPrefix }
+
+define i32 @XPTC_InvokeByIndex(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind {
+entry:
+	call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},~{dirflag},~{fpsr},~{flags}"( double undef, double undef, double undef, double 1.0, double undef, double 0.0, double undef, double 0.0 ) nounwind
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
new file mode 100644
index 0000000..b92c789
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll

@@ -0,0 +1,17 @@
+; Check that eh_return & unwind_init were properly lowered
+; RUN: llc < %s | grep %ebp | count 7
+; RUN: llc < %s | grep %ecx | count 5
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i386-pc-linux"
+
+define i8* @test(i32 %a, i8* %b)  {
+entry:
+  call void @llvm.eh.unwind.init()
+  %foo   = alloca i32
+  call void @llvm.eh.return.i32(i32 %a, i8* %b)
+  unreachable
+}
+
+declare void @llvm.eh.return.i32(i32, i8*)
+declare void @llvm.eh.unwind.init()

diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
new file mode 100644
index 0000000..00ab735
--- /dev/null
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll

@@ -0,0 +1,17 @@
+; Check that eh_return & unwind_init were properly lowered
+; RUN: llc < %s | grep %rbp | count 5
+; RUN: llc < %s | grep %rcx | count 3
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8* @test(i64 %a, i8* %b)  {
+entry:
+  call void @llvm.eh.unwind.init()
+  %foo   = alloca i32
+  call void @llvm.eh.return.i64(i64 %a, i8* %b)
+  unreachable
+}
+
+declare void @llvm.eh.return.i64(i64, i8*)
+declare void @llvm.eh.unwind.init()

diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
new file mode 100644
index 0000000..60be0d5
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; PR2687
+
+define <2 x double> @a(<2 x i32> %x) nounwind {
+entry:
+  %y = sitofp <2 x i32> %x to <2 x double>
+  ret <2 x double> %y
+}
+
+define <2 x i32> @b(<2 x double> %x) nounwind {
+entry:
+  %y = fptosi <2 x double> %x to <2 x i32>
+  ret <2 x i32> %y
+}

diff --git a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
new file mode 100644
index 0000000..b3312d9
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll

@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; PR2757
+
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_125(i32 %p_126, i32 %p_128, i32 %p_129) nounwind {
+entry:
+	%tmp2.i = load i32* @g_3		; <i32> [#uses=2]
+	%conv = trunc i32 %tmp2.i to i16		; <i16> [#uses=3]
+	br label %forcond1.preheader.i.i7
+
+forcond1.preheader.i.i7:		; preds = %forinc6.i.i25, %entry
+	%p_86.addr.06.i.i4 = phi i32 [ 0, %entry ], [ %sub.i.i.i23, %forinc6.i.i25 ]		; <i32> [#uses=1]
+	%p_87.addr.15.i.i5 = phi i32 [ 0, %entry ], [ %p_87.addr.0.lcssa.i.i21, %forinc6.i.i25 ]		; <i32> [#uses=2]
+	br i1 false, label %forinc6.i.i25, label %forinc.i.i11
+
+forinc.i.i11:		; preds = %forcond1.backedge.i.i20, %forcond1.preheader.i.i7
+	%p_87.addr.02.i.i8 = phi i32 [ %p_87.addr.15.i.i5, %forcond1.preheader.i.i7 ], [ %p_87.addr.0.be.i.i18, %forcond1.backedge.i.i20 ]		; <i32> [#uses=1]
+	%conv.i.i9 = trunc i32 %p_87.addr.02.i.i8 to i8		; <i8> [#uses=1]
+	br i1 false, label %land_rhs3.i.i.i14, label %lor_rhs.i.i.i17
+
+land_rhs3.i.i.i14:		; preds = %forinc.i.i11
+	br i1 false, label %forcond1.backedge.i.i20, label %lor_rhs.i.i.i17
+
+lor_rhs.i.i.i17:		; preds = %land_rhs3.i.i.i14, %forinc.i.i11
+	%conv29.i.i.i15 = sext i8 %conv.i.i9 to i32		; <i32> [#uses=1]
+	%add.i.i.i16 = add i32 %conv29.i.i.i15, 1		; <i32> [#uses=1]
+	br label %forcond1.backedge.i.i20
+
+forcond1.backedge.i.i20:		; preds = %lor_rhs.i.i.i17, %land_rhs3.i.i.i14
+	%p_87.addr.0.be.i.i18 = phi i32 [ %add.i.i.i16, %lor_rhs.i.i.i17 ], [ 0, %land_rhs3.i.i.i14 ]		; <i32> [#uses=3]
+	%tobool3.i.i19 = icmp eq i32 %p_87.addr.0.be.i.i18, 0		; <i1> [#uses=1]
+	br i1 %tobool3.i.i19, label %forinc6.i.i25, label %forinc.i.i11
+
+forinc6.i.i25:		; preds = %forcond1.backedge.i.i20, %forcond1.preheader.i.i7
+	%p_87.addr.0.lcssa.i.i21 = phi i32 [ %p_87.addr.15.i.i5, %forcond1.preheader.i.i7 ], [ %p_87.addr.0.be.i.i18, %forcond1.backedge.i.i20 ]		; <i32> [#uses=1]
+	%conv.i.i.i22 = and i32 %p_86.addr.06.i.i4, 255		; <i32> [#uses=1]
+	%sub.i.i.i23 = add i32 %conv.i.i.i22, -1		; <i32> [#uses=2]
+	%phitmp.i.i24 = icmp eq i32 %sub.i.i.i23, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i.i24, label %func_106.exit27, label %forcond1.preheader.i.i7
+
+func_106.exit27:		; preds = %forinc6.i.i25
+	%cmp = icmp ne i32 %tmp2.i, 1		; <i1> [#uses=3]
+	%cmp.ext = zext i1 %cmp to i32		; <i32> [#uses=1]
+	br i1 %cmp, label %safe_mod_int16_t_s_s.exit, label %lor_rhs.i
+
+lor_rhs.i:		; preds = %func_106.exit27
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=1]
+	%or.cond.i = or i1 false, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %ifend.i, label %safe_mod_int16_t_s_s.exit
+
+ifend.i:		; preds = %lor_rhs.i
+	%conv6.i = sext i16 %conv to i32		; <i32> [#uses=1]
+	%rem.i = urem i32 %conv6.i, %cmp.ext		; <i32> [#uses=1]
+	%conv8.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	br label %safe_mod_int16_t_s_s.exit
+
+safe_mod_int16_t_s_s.exit:		; preds = %ifend.i, %lor_rhs.i, %func_106.exit27
+	%call31 = phi i16 [ %conv8.i, %ifend.i ], [ %conv, %func_106.exit27 ], [ %conv, %lor_rhs.i ]		; <i16> [#uses=1]
+	%conv4 = sext i16 %call31 to i32		; <i32> [#uses=1]
+	%call5 = tail call i32 (...)* @func_104( i32 %conv4 )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_104(...)

diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
new file mode 100644
index 0000000..108f243
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86
+; PR2783
+
+@g_15 = external global i16		; <i16*> [#uses=2]
+
+define i32 @func_3(i32 %p_5) nounwind {
+entry:
+	%0 = srem i32 1, 0		; <i32> [#uses=2]
+	%1 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%2 = zext i16 %1 to i32		; <i32> [#uses=1]
+	%3 = and i32 %2, 1		; <i32> [#uses=1]
+	%4 = tail call i32 (...)* @rshift_u_s( i32 1 ) nounwind		; <i32> [#uses=1]
+	%5 = icmp slt i32 %4, 2		; <i1> [#uses=1]
+	%6 = zext i1 %5 to i32		; <i32> [#uses=1]
+	%7 = icmp sge i32 %3, %6		; <i1> [#uses=1]
+	%8 = zext i1 %7 to i32		; <i32> [#uses=1]
+	%9 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%10 = icmp eq i16 %9, 0		; <i1> [#uses=1]
+	%11 = zext i1 %10 to i32		; <i32> [#uses=1]
+	%12 = tail call i32 (...)* @func_20( i32 1 ) nounwind		; <i32> [#uses=1]
+	%13 = icmp sge i32 %11, %12		; <i1> [#uses=1]
+	%14 = zext i1 %13 to i32		; <i32> [#uses=1]
+	%15 = sub i32 %8, %14		; <i32> [#uses=1]
+	%16 = icmp ult i32 %15, 2		; <i1> [#uses=1]
+	%17 = zext i1 %16 to i32		; <i32> [#uses=1]
+	%18 = icmp ugt i32 %0, 3		; <i1> [#uses=1]
+	%or.cond = or i1 false, %18		; <i1> [#uses=1]
+	%19 = select i1 %or.cond, i32 0, i32 %0		; <i32> [#uses=1]
+	%.0 = lshr i32 %17, %19		; <i32> [#uses=1]
+	%20 = tail call i32 (...)* @func_7( i32 %.0 ) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @rshift_u_s(...)
+
+declare i32 @func_20(...)
+
+declare i32 @func_7(...)

diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
new file mode 100644
index 0000000..534f990
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86
+; PR2748
+
+@g_73 = external global i32		; <i32*> [#uses=1]
+@g_5 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_44(i16 signext %p_46) nounwind {
+entry:
+	%0 = load i32* @g_5, align 4		; <i32> [#uses=1]
+	%1 = ashr i32 %0, 1		; <i32> [#uses=1]
+	%2 = icmp sgt i32 %1, 1		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
+	%4 = load i32* @g_73, align 4		; <i32> [#uses=1]
+	%5 = zext i16 %p_46 to i64		; <i64> [#uses=1]
+	%6 = sub i64 0, %5		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i8		; <i8> [#uses=2]
+	%8 = trunc i32 %4 to i8		; <i8> [#uses=2]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb11, label %bb12
+
+bb11:		; preds = %entry
+	%10 = urem i8 %7, %8		; <i8> [#uses=1]
+	br label %bb12
+
+bb12:		; preds = %bb11, %entry
+	%.014.in = phi i8 [ %10, %bb11 ], [ %7, %entry ]		; <i8> [#uses=1]
+	%11 = icmp ne i8 %.014.in, 0		; <i1> [#uses=1]
+	%12 = zext i1 %11 to i32		; <i32> [#uses=1]
+	%13 = tail call i32 (...)* @func_48( i32 %12, i32 %3, i32 0 ) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_48(...)

diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
new file mode 100644
index 0000000..74429c3
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 | not grep "movl (%edx), %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %eax, %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %edx, %edx"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%eax), %eax"
+; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%edx), %edx"
+
+; %0 must not be put in EAX or EDX.
+; In the first asm, $0 and $2 must not be put in EAX.
+; In the second asm, $0 and $2 must not be put in EDX.
+; This is kind of hard to test thoroughly, but the things above should continue
+; to pass, I think.
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@x = common global i32 0		; <i32*> [#uses=1]
+
+define i32 @aci(i32* %pw) nounwind {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=0]
+	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=1]
+	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0		; <i32> [#uses=1]
+	%1 = add i32 %asmresult3, %0		; <i32> [#uses=1]
+	ret i32 %1
+}

diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
new file mode 100644
index 0000000..e3b6fdf
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)"
+; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)"
+; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
+; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
+; operand.  There are many combinations that work; this is what llc puts out now.
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+	%struct.foo = type { i32, i32, i8* }
+
+define i32 @get(%struct.foo* %c, i8* %state) nounwind {
+entry:
+	%0 = getelementptr %struct.foo* %c, i32 0, i32 0		; <i32*> [#uses=2]
+	%1 = getelementptr %struct.foo* %c, i32 0, i32 1		; <i32*> [#uses=2]
+	%2 = getelementptr %struct.foo* %c, i32 0, i32 2		; <i8**> [#uses=2]
+	%3 = load i32* %0, align 4		; <i32> [#uses=1]
+	%4 = load i32* %1, align 4		; <i32> [#uses=1]
+	%5 = load i8* %state, align 1		; <i8> [#uses=1]
+	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#$0 $1 $2 $3 $4 $5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
+	%asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	store i32 %asmresult1, i32* %0
+	%asmresult2 = extractvalue { i32, i32, i32, i32 } %asmtmp, 2		; <i32> [#uses=1]
+	store i32 %asmresult2, i32* %1
+	ret i32 %asmresult
+}

diff --git a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
new file mode 100644
index 0000000..a8f2912
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; PR2808
+
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_4() nounwind {
+entry:
+	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	%2 = sub i8 1, %1		; <i8> [#uses=1]
+	%3 = sext i8 %2 to i32		; <i32> [#uses=1]
+	%.0 = ashr i32 %3, select (i1 icmp ne (i8 zext (i1 icmp ugt (i32 ptrtoint (i32 ()* @func_4 to i32), i32 3) to i8), i8 0), i32 0, i32 ptrtoint (i32 ()* @func_4 to i32))		; <i32> [#uses=1]
+	%4 = urem i32 %0, %.0		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %return, label %bb4
+
+bb4:		; preds = %entry
+	ret i32 undef
+
+return:		; preds = %entry
+	ret i32 undef
+}

diff --git a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
new file mode 100644
index 0000000..c92a8f4
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movs | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fld | count 2
+; check 'inreg' attribute for sse_regparm
+
+define double @foo1() inreg nounwind {
+  ret double 1.0
+}
+
+define float @foo2() inreg nounwind {
+  ret float 1.0
+}
+
+define double @bar() nounwind {
+  ret double 1.0
+}
+
+define float @bar2() nounwind {
+  ret float 1.0
+}

diff --git a/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
new file mode 100644
index 0000000..f1ada28
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+
+	%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i32, i32, i32, [18 x i8] }
+	%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i32, i32, [3 x i32] }
+	%struct.dwarf_eh_bases = type { i8*, i8*, i8* }
+
+declare fastcc void @uw_init_context_1(%struct._Unwind_Context*, i8*, i8*)
+
+declare i8* @llvm.eh.dwarf.cfa(i32) nounwind
+
+define hidden void @_Unwind_Resume(%struct._Unwind_Exception* %exc) noreturn noreturn {
+entry:
+	%0 = call i8* @llvm.eh.dwarf.cfa(i32 0)		; <i8*> [#uses=1]
+	call fastcc void @uw_init_context_1(%struct._Unwind_Context* null, i8* %0, i8* null)
+	unreachable
+}

diff --git a/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
new file mode 100644
index 0000000..c36cf39
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-29-ReMatBug.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct..0objc_selector = type opaque
+	%struct.NSString = type opaque
+	%struct.XCStringList = type { i32, %struct._XCStringListNode* }
+	%struct._XCStringListNode = type { [3 x i8], [0 x i8], i8 }
+	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
+internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([3 x i8]* @"\01LC", i32 0, i32 0), i32 2 }		; <%struct.__builtin_CFString*>:0 [#uses=1]
+@__CFConstantStringClassReference = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+@"\01LC" = internal constant [3 x i8] c"NO\00"		; <[3 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@llvm.used1 = appending global [1 x i8*] [ i8* bitcast (%struct.NSString* (%struct.XCStringList*, %struct..0objc_selector*)* @"-[XCStringList stringRepresentation]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.NSString* @"-[XCStringList stringRepresentation]"(%struct.XCStringList* %self, %struct..0objc_selector* %_cmd) nounwind {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = and i32 %0, 16777215		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb44, label %bb4
+
+bb4:		; preds = %entry
+	%3 = load %struct._XCStringListNode** null, align 4		; <%struct._XCStringListNode*> [#uses=2]
+	%4 = icmp eq %struct._XCStringListNode* %3, null		; <i1> [#uses=1]
+	%5 = bitcast %struct._XCStringListNode* %3 to i32*		; <i32*> [#uses=1]
+	br label %bb37.outer
+
+bb6:		; preds = %bb37
+	br label %bb19
+
+bb19:		; preds = %bb37, %bb6
+	%.rle = phi i32 [ 0, %bb6 ], [ %10, %bb37 ]		; <i32> [#uses=1]
+	%bufptr.0.lcssa = phi i8* [ null, %bb6 ], [ null, %bb37 ]		; <i8*> [#uses=2]
+	%6 = and i32 %.rle, 16777215		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, 0		; <i1> [#uses=1]
+	br i1 %7, label %bb25.split, label %bb37
+
+bb25.split:		; preds = %bb19
+	call void @foo(i8* getelementptr ([1 x i8]* @"\01LC1", i32 0, i32 0)) nounwind nounwind
+	br label %bb35.outer
+
+bb34:		; preds = %bb35, %bb35, %bb35, %bb35
+	%8 = getelementptr i8* %bufptr.0.lcssa, i32 %totalLength.0.ph		; <i8*> [#uses=1]
+	store i8 92, i8* %8, align 1
+	br label %bb35.outer
+
+bb35.outer:		; preds = %bb34, %bb25.split
+	%totalLength.0.ph = add i32 0, %totalLength.1.ph		; <i32> [#uses=2]
+	br label %bb35
+
+bb35:		; preds = %bb35, %bb35.outer
+	%9 = load i8* null, align 1		; <i8> [#uses=1]
+	switch i8 %9, label %bb35 [
+		i8 0, label %bb37.outer
+		i8 32, label %bb34
+		i8 92, label %bb34
+		i8 34, label %bb34
+		i8 39, label %bb34
+	]
+
+bb37.outer:		; preds = %bb35, %bb4
+	%totalLength.1.ph = phi i32 [ 0, %bb4 ], [ %totalLength.0.ph, %bb35 ]		; <i32> [#uses=1]
+	%bufptr.1.ph = phi i8* [ null, %bb4 ], [ %bufptr.0.lcssa, %bb35 ]		; <i8*> [#uses=2]
+	br i1 %4, label %bb39.split, label %bb37
+
+bb37:		; preds = %bb37.outer, %bb19
+	%10 = load i32* %5, align 4		; <i32> [#uses=1]
+	br i1 false, label %bb6, label %bb19
+
+bb39.split:		; preds = %bb37.outer
+	%11 = bitcast i8* null to %struct.NSString*		; <%struct.NSString*> [#uses=2]
+	%12 = icmp eq i8* null, %bufptr.1.ph		; <i1> [#uses=1]
+	br i1 %12, label %bb44, label %bb42
+
+bb42:		; preds = %bb39.split
+	call void @quux(i8* %bufptr.1.ph) nounwind nounwind
+	ret %struct.NSString* %11
+
+bb44:		; preds = %bb39.split, %entry
+	%.0 = phi %struct.NSString* [ bitcast (%struct.__builtin_CFString* @0 to %struct.NSString*), %entry ], [ %11, %bb39.split ]		; <%struct.NSString*> [#uses=1]
+	ret %struct.NSString* %.0
+}
+
+declare void @foo(i8*)
+
+declare void @quux(i8*)

diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
new file mode 100644
index 0000000..935c4c5
--- /dev/null
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | not grep movz
+; PR2835
+
+@g_407 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 ()* @main to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @main() nounwind {
+entry:
+	%0 = volatile load i32* @g_407, align 4		; <i32> [#uses=1]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @func_45(i8 zeroext) nounwind

diff --git a/test/CodeGen/X86/2008-10-02-Atomics32-2.ll b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
new file mode 100644
index 0000000..b48c4ad
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-02-Atomics32-2.ll

@@ -0,0 +1,969 @@
+; RUN: llc < %s -march=x86 > %t
+;; This version includes 64-bit version of binary operators (in 32-bit mode).
+;; Swap, cmp-and-swap not supported yet in this mode.
+; ModuleID = 'Atomics.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@sc = common global i8 0		; <i8*> [#uses=52]
+@uc = common global i8 0		; <i8*> [#uses=112]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=23]
+@sl = common global i32 0		; <i32*> [#uses=15]
+@ul = common global i32 0		; <i32*> [#uses=15]
+@sll = common global i64 0, align 8		; <i64*> [#uses=13]
+@ull = common global i64 0, align 8		; <i64*> [#uses=13]
+
+define void @test_op_ignore() nounwind {
+entry:
+	%0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%1 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%3 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %2, i16 1)		; <i16> [#uses=0]
+	%4 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%5 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %4, i16 1)		; <i16> [#uses=0]
+	%6 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%7 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %6, i32 1)		; <i32> [#uses=0]
+	%8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%9 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %8, i32 1)		; <i32> [#uses=0]
+	%10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%11 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %10, i32 1)		; <i32> [#uses=0]
+	%12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%13 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %12, i32 1)		; <i32> [#uses=0]
+	%14 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%15 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %14, i64 1)		; <i64> [#uses=0]
+	%16 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%17 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %16, i64 1)		; <i64> [#uses=0]
+	%18 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%19 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%20 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%21 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %20, i16 1)		; <i16> [#uses=0]
+	%22 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%23 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %22, i16 1)		; <i16> [#uses=0]
+	%24 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%25 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %24, i32 1)		; <i32> [#uses=0]
+	%26 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%27 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %26, i32 1)		; <i32> [#uses=0]
+	%28 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%29 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %28, i32 1)		; <i32> [#uses=0]
+	%30 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%31 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %30, i32 1)		; <i32> [#uses=0]
+	%32 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%33 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %32, i64 1)		; <i64> [#uses=0]
+	%34 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%35 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %34, i64 1)		; <i64> [#uses=0]
+	%36 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%37 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%38 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%39 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %38, i16 1)		; <i16> [#uses=0]
+	%40 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%41 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %40, i16 1)		; <i16> [#uses=0]
+	%42 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%43 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %42, i32 1)		; <i32> [#uses=0]
+	%44 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%45 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %44, i32 1)		; <i32> [#uses=0]
+	%46 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%47 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %46, i32 1)		; <i32> [#uses=0]
+	%48 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%49 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %48, i32 1)		; <i32> [#uses=0]
+	%50 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%51 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %50, i64 1)		; <i64> [#uses=0]
+	%52 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%53 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %52, i64 1)		; <i64> [#uses=0]
+	%54 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%55 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%57 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %56, i16 1)		; <i16> [#uses=0]
+	%58 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%59 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %58, i16 1)		; <i16> [#uses=0]
+	%60 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%61 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %60, i32 1)		; <i32> [#uses=0]
+	%62 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%63 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %62, i32 1)		; <i32> [#uses=0]
+	%64 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%65 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %64, i32 1)		; <i32> [#uses=0]
+	%66 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%67 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %66, i32 1)		; <i32> [#uses=0]
+	%68 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%69 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %68, i64 1)		; <i64> [#uses=0]
+	%70 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%71 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %70, i64 1)		; <i64> [#uses=0]
+	%72 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%73 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%74 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%75 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %74, i16 1)		; <i16> [#uses=0]
+	%76 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%77 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %76, i16 1)		; <i16> [#uses=0]
+	%78 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%79 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %78, i32 1)		; <i32> [#uses=0]
+	%80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%81 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %80, i32 1)		; <i32> [#uses=0]
+	%82 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%83 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %82, i32 1)		; <i32> [#uses=0]
+	%84 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%85 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %84, i32 1)		; <i32> [#uses=0]
+	%86 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%87 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %86, i64 1)		; <i64> [#uses=0]
+	%88 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%89 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %88, i64 1)		; <i64> [#uses=0]
+	%90 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%91 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%92 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%93 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %92, i16 1)		; <i16> [#uses=0]
+	%94 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%95 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %94, i16 1)		; <i16> [#uses=0]
+	%96 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%97 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %96, i32 1)		; <i32> [#uses=0]
+	%98 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%99 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %98, i32 1)		; <i32> [#uses=0]
+	%100 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%101 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %100, i32 1)		; <i32> [#uses=0]
+	%102 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%103 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %102, i32 1)		; <i32> [#uses=0]
+	%104 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%105 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %104, i64 1)		; <i64> [#uses=0]
+	%106 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%107 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %106, i64 1)		; <i64> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.or.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.xor.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.and.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.nand.i64.p0i64(i64*, i64) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	%0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	%1 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	%2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%3 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %2, i16 11)		; <i16> [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	%4 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%5 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %4, i16 11)		; <i16> [#uses=1]
+	store i16 %5, i16* @us, align 2
+	%6 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%7 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %6, i32 11)		; <i32> [#uses=1]
+	store i32 %7, i32* @si, align 4
+	%8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%9 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %8, i32 11)		; <i32> [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	%10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%11 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %10, i32 11)		; <i32> [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	%12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%13 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %12, i32 11)		; <i32> [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	%14 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%15 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %14, i64 11)		; <i64> [#uses=1]
+	store i64 %15, i64* @sll, align 8
+	%16 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%17 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %16, i64 11)		; <i64> [#uses=1]
+	store i64 %17, i64* @ull, align 8
+	%18 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %18, i8* @sc, align 1
+	%19 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %19, i8* @uc, align 1
+	%20 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%21 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %20, i16 11)		; <i16> [#uses=1]
+	store i16 %21, i16* @ss, align 2
+	%22 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%23 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %22, i16 11)		; <i16> [#uses=1]
+	store i16 %23, i16* @us, align 2
+	%24 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%25 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %24, i32 11)		; <i32> [#uses=1]
+	store i32 %25, i32* @si, align 4
+	%26 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%27 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %26, i32 11)		; <i32> [#uses=1]
+	store i32 %27, i32* @ui, align 4
+	%28 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%29 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %28, i32 11)		; <i32> [#uses=1]
+	store i32 %29, i32* @sl, align 4
+	%30 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%31 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %30, i32 11)		; <i32> [#uses=1]
+	store i32 %31, i32* @ul, align 4
+	%32 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%33 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %32, i64 11)		; <i64> [#uses=1]
+	store i64 %33, i64* @sll, align 8
+	%34 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%35 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %34, i64 11)		; <i64> [#uses=1]
+	store i64 %35, i64* @ull, align 8
+	%36 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %36, i8* @sc, align 1
+	%37 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %37, i8* @uc, align 1
+	%38 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%39 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %38, i16 11)		; <i16> [#uses=1]
+	store i16 %39, i16* @ss, align 2
+	%40 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%41 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %40, i16 11)		; <i16> [#uses=1]
+	store i16 %41, i16* @us, align 2
+	%42 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%43 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %42, i32 11)		; <i32> [#uses=1]
+	store i32 %43, i32* @si, align 4
+	%44 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%45 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %44, i32 11)		; <i32> [#uses=1]
+	store i32 %45, i32* @ui, align 4
+	%46 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%47 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %46, i32 11)		; <i32> [#uses=1]
+	store i32 %47, i32* @sl, align 4
+	%48 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%49 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %48, i32 11)		; <i32> [#uses=1]
+	store i32 %49, i32* @ul, align 4
+	%50 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%51 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %50, i64 11)		; <i64> [#uses=1]
+	store i64 %51, i64* @sll, align 8
+	%52 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%53 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %52, i64 11)		; <i64> [#uses=1]
+	store i64 %53, i64* @ull, align 8
+	%54 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %54, i8* @sc, align 1
+	%55 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %55, i8* @uc, align 1
+	%56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%57 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %56, i16 11)		; <i16> [#uses=1]
+	store i16 %57, i16* @ss, align 2
+	%58 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%59 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %58, i16 11)		; <i16> [#uses=1]
+	store i16 %59, i16* @us, align 2
+	%60 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%61 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %60, i32 11)		; <i32> [#uses=1]
+	store i32 %61, i32* @si, align 4
+	%62 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%63 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %62, i32 11)		; <i32> [#uses=1]
+	store i32 %63, i32* @ui, align 4
+	%64 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%65 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %64, i32 11)		; <i32> [#uses=1]
+	store i32 %65, i32* @sl, align 4
+	%66 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%67 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %66, i32 11)		; <i32> [#uses=1]
+	store i32 %67, i32* @ul, align 4
+	%68 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%69 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %68, i64 11)		; <i64> [#uses=1]
+	store i64 %69, i64* @sll, align 8
+	%70 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%71 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %70, i64 11)		; <i64> [#uses=1]
+	store i64 %71, i64* @ull, align 8
+	%72 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %72, i8* @sc, align 1
+	%73 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %73, i8* @uc, align 1
+	%74 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%75 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %74, i16 11)		; <i16> [#uses=1]
+	store i16 %75, i16* @ss, align 2
+	%76 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%77 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %76, i16 11)		; <i16> [#uses=1]
+	store i16 %77, i16* @us, align 2
+	%78 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%79 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %78, i32 11)		; <i32> [#uses=1]
+	store i32 %79, i32* @si, align 4
+	%80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%81 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %80, i32 11)		; <i32> [#uses=1]
+	store i32 %81, i32* @ui, align 4
+	%82 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%83 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %82, i32 11)		; <i32> [#uses=1]
+	store i32 %83, i32* @sl, align 4
+	%84 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%85 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %84, i32 11)		; <i32> [#uses=1]
+	store i32 %85, i32* @ul, align 4
+	%86 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%87 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %86, i64 11)		; <i64> [#uses=1]
+	store i64 %87, i64* @sll, align 8
+	%88 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%89 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %88, i64 11)		; <i64> [#uses=1]
+	store i64 %89, i64* @ull, align 8
+	%90 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %90, i8* @sc, align 1
+	%91 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %91, i8* @uc, align 1
+	%92 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%93 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %92, i16 11)		; <i16> [#uses=1]
+	store i16 %93, i16* @ss, align 2
+	%94 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%95 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %94, i16 11)		; <i16> [#uses=1]
+	store i16 %95, i16* @us, align 2
+	%96 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%97 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %96, i32 11)		; <i32> [#uses=1]
+	store i32 %97, i32* @si, align 4
+	%98 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%99 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %98, i32 11)		; <i32> [#uses=1]
+	store i32 %99, i32* @ui, align 4
+	%100 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%101 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %100, i32 11)		; <i32> [#uses=1]
+	store i32 %101, i32* @sl, align 4
+	%102 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%103 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %102, i32 11)		; <i32> [#uses=1]
+	store i32 %103, i32* @ul, align 4
+	%104 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%105 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %104, i64 11)		; <i64> [#uses=1]
+	store i64 %105, i64* @sll, align 8
+	%106 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%107 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %106, i64 11)		; <i64> [#uses=1]
+	store i64 %107, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	%0 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%1 = zext i8 %0 to i32		; <i32> [#uses=1]
+	%2 = trunc i32 %1 to i8		; <i8> [#uses=2]
+	%3 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 %2)		; <i8> [#uses=1]
+	%4 = add i8 %3, %2		; <i8> [#uses=1]
+	store i8 %4, i8* @sc, align 1
+	%5 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%6 = zext i8 %5 to i32		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 %7)		; <i8> [#uses=1]
+	%9 = add i8 %8, %7		; <i8> [#uses=1]
+	store i8 %9, i8* @uc, align 1
+	%10 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%11 = zext i8 %10 to i32		; <i32> [#uses=1]
+	%12 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%13 = trunc i32 %11 to i16		; <i16> [#uses=2]
+	%14 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %12, i16 %13)		; <i16> [#uses=1]
+	%15 = add i16 %14, %13		; <i16> [#uses=1]
+	store i16 %15, i16* @ss, align 2
+	%16 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%17 = zext i8 %16 to i32		; <i32> [#uses=1]
+	%18 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%19 = trunc i32 %17 to i16		; <i16> [#uses=2]
+	%20 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %18, i16 %19)		; <i16> [#uses=1]
+	%21 = add i16 %20, %19		; <i16> [#uses=1]
+	store i16 %21, i16* @us, align 2
+	%22 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%23 = zext i8 %22 to i32		; <i32> [#uses=2]
+	%24 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%25 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %24, i32 %23)		; <i32> [#uses=1]
+	%26 = add i32 %25, %23		; <i32> [#uses=1]
+	store i32 %26, i32* @si, align 4
+	%27 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%28 = zext i8 %27 to i32		; <i32> [#uses=2]
+	%29 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%30 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %29, i32 %28)		; <i32> [#uses=1]
+	%31 = add i32 %30, %28		; <i32> [#uses=1]
+	store i32 %31, i32* @ui, align 4
+	%32 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%33 = zext i8 %32 to i32		; <i32> [#uses=2]
+	%34 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%35 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %34, i32 %33)		; <i32> [#uses=1]
+	%36 = add i32 %35, %33		; <i32> [#uses=1]
+	store i32 %36, i32* @sl, align 4
+	%37 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%38 = zext i8 %37 to i32		; <i32> [#uses=2]
+	%39 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%40 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %39, i32 %38)		; <i32> [#uses=1]
+	%41 = add i32 %40, %38		; <i32> [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	%42 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%43 = zext i8 %42 to i64		; <i64> [#uses=2]
+	%44 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%45 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %44, i64 %43)		; <i64> [#uses=1]
+	%46 = add i64 %45, %43		; <i64> [#uses=1]
+	store i64 %46, i64* @sll, align 8
+	%47 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%48 = zext i8 %47 to i64		; <i64> [#uses=2]
+	%49 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%50 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %49, i64 %48)		; <i64> [#uses=1]
+	%51 = add i64 %50, %48		; <i64> [#uses=1]
+	store i64 %51, i64* @ull, align 8
+	%52 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=1]
+	%54 = trunc i32 %53 to i8		; <i8> [#uses=2]
+	%55 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 %54)		; <i8> [#uses=1]
+	%56 = sub i8 %55, %54		; <i8> [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	%57 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%58 = zext i8 %57 to i32		; <i32> [#uses=1]
+	%59 = trunc i32 %58 to i8		; <i8> [#uses=2]
+	%60 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 %59)		; <i8> [#uses=1]
+	%61 = sub i8 %60, %59		; <i8> [#uses=1]
+	store i8 %61, i8* @uc, align 1
+	%62 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%63 = zext i8 %62 to i32		; <i32> [#uses=1]
+	%64 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%65 = trunc i32 %63 to i16		; <i16> [#uses=2]
+	%66 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %64, i16 %65)		; <i16> [#uses=1]
+	%67 = sub i16 %66, %65		; <i16> [#uses=1]
+	store i16 %67, i16* @ss, align 2
+	%68 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%69 = zext i8 %68 to i32		; <i32> [#uses=1]
+	%70 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%71 = trunc i32 %69 to i16		; <i16> [#uses=2]
+	%72 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %70, i16 %71)		; <i16> [#uses=1]
+	%73 = sub i16 %72, %71		; <i16> [#uses=1]
+	store i16 %73, i16* @us, align 2
+	%74 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%75 = zext i8 %74 to i32		; <i32> [#uses=2]
+	%76 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%77 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %76, i32 %75)		; <i32> [#uses=1]
+	%78 = sub i32 %77, %75		; <i32> [#uses=1]
+	store i32 %78, i32* @si, align 4
+	%79 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%80 = zext i8 %79 to i32		; <i32> [#uses=2]
+	%81 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%82 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %81, i32 %80)		; <i32> [#uses=1]
+	%83 = sub i32 %82, %80		; <i32> [#uses=1]
+	store i32 %83, i32* @ui, align 4
+	%84 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%85 = zext i8 %84 to i32		; <i32> [#uses=2]
+	%86 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%87 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %86, i32 %85)		; <i32> [#uses=1]
+	%88 = sub i32 %87, %85		; <i32> [#uses=1]
+	store i32 %88, i32* @sl, align 4
+	%89 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%90 = zext i8 %89 to i32		; <i32> [#uses=2]
+	%91 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%92 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %91, i32 %90)		; <i32> [#uses=1]
+	%93 = sub i32 %92, %90		; <i32> [#uses=1]
+	store i32 %93, i32* @ul, align 4
+	%94 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%95 = zext i8 %94 to i64		; <i64> [#uses=2]
+	%96 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%97 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %96, i64 %95)		; <i64> [#uses=1]
+	%98 = sub i64 %97, %95		; <i64> [#uses=1]
+	store i64 %98, i64* @sll, align 8
+	%99 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%100 = zext i8 %99 to i64		; <i64> [#uses=2]
+	%101 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%102 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %101, i64 %100)		; <i64> [#uses=1]
+	%103 = sub i64 %102, %100		; <i64> [#uses=1]
+	store i64 %103, i64* @ull, align 8
+	%104 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%105 = zext i8 %104 to i32		; <i32> [#uses=1]
+	%106 = trunc i32 %105 to i8		; <i8> [#uses=2]
+	%107 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @sc, i8 %106)		; <i8> [#uses=1]
+	%108 = or i8 %107, %106		; <i8> [#uses=1]
+	store i8 %108, i8* @sc, align 1
+	%109 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%110 = zext i8 %109 to i32		; <i32> [#uses=1]
+	%111 = trunc i32 %110 to i8		; <i8> [#uses=2]
+	%112 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @uc, i8 %111)		; <i8> [#uses=1]
+	%113 = or i8 %112, %111		; <i8> [#uses=1]
+	store i8 %113, i8* @uc, align 1
+	%114 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%115 = zext i8 %114 to i32		; <i32> [#uses=1]
+	%116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%117 = trunc i32 %115 to i16		; <i16> [#uses=2]
+	%118 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %116, i16 %117)		; <i16> [#uses=1]
+	%119 = or i16 %118, %117		; <i16> [#uses=1]
+	store i16 %119, i16* @ss, align 2
+	%120 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%121 = zext i8 %120 to i32		; <i32> [#uses=1]
+	%122 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%123 = trunc i32 %121 to i16		; <i16> [#uses=2]
+	%124 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %122, i16 %123)		; <i16> [#uses=1]
+	%125 = or i16 %124, %123		; <i16> [#uses=1]
+	store i16 %125, i16* @us, align 2
+	%126 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%127 = zext i8 %126 to i32		; <i32> [#uses=2]
+	%128 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%129 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %128, i32 %127)		; <i32> [#uses=1]
+	%130 = or i32 %129, %127		; <i32> [#uses=1]
+	store i32 %130, i32* @si, align 4
+	%131 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%132 = zext i8 %131 to i32		; <i32> [#uses=2]
+	%133 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%134 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %133, i32 %132)		; <i32> [#uses=1]
+	%135 = or i32 %134, %132		; <i32> [#uses=1]
+	store i32 %135, i32* @ui, align 4
+	%136 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%137 = zext i8 %136 to i32		; <i32> [#uses=2]
+	%138 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%139 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %138, i32 %137)		; <i32> [#uses=1]
+	%140 = or i32 %139, %137		; <i32> [#uses=1]
+	store i32 %140, i32* @sl, align 4
+	%141 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%142 = zext i8 %141 to i32		; <i32> [#uses=2]
+	%143 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%144 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %143, i32 %142)		; <i32> [#uses=1]
+	%145 = or i32 %144, %142		; <i32> [#uses=1]
+	store i32 %145, i32* @ul, align 4
+	%146 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%147 = zext i8 %146 to i64		; <i64> [#uses=2]
+	%148 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%149 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %148, i64 %147)		; <i64> [#uses=1]
+	%150 = or i64 %149, %147		; <i64> [#uses=1]
+	store i64 %150, i64* @sll, align 8
+	%151 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%152 = zext i8 %151 to i64		; <i64> [#uses=2]
+	%153 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%154 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %153, i64 %152)		; <i64> [#uses=1]
+	%155 = or i64 %154, %152		; <i64> [#uses=1]
+	store i64 %155, i64* @ull, align 8
+	%156 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%157 = zext i8 %156 to i32		; <i32> [#uses=1]
+	%158 = trunc i32 %157 to i8		; <i8> [#uses=2]
+	%159 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @sc, i8 %158)		; <i8> [#uses=1]
+	%160 = xor i8 %159, %158		; <i8> [#uses=1]
+	store i8 %160, i8* @sc, align 1
+	%161 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%162 = zext i8 %161 to i32		; <i32> [#uses=1]
+	%163 = trunc i32 %162 to i8		; <i8> [#uses=2]
+	%164 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @uc, i8 %163)		; <i8> [#uses=1]
+	%165 = xor i8 %164, %163		; <i8> [#uses=1]
+	store i8 %165, i8* @uc, align 1
+	%166 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%167 = zext i8 %166 to i32		; <i32> [#uses=1]
+	%168 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%169 = trunc i32 %167 to i16		; <i16> [#uses=2]
+	%170 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %168, i16 %169)		; <i16> [#uses=1]
+	%171 = xor i16 %170, %169		; <i16> [#uses=1]
+	store i16 %171, i16* @ss, align 2
+	%172 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%173 = zext i8 %172 to i32		; <i32> [#uses=1]
+	%174 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%175 = trunc i32 %173 to i16		; <i16> [#uses=2]
+	%176 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %174, i16 %175)		; <i16> [#uses=1]
+	%177 = xor i16 %176, %175		; <i16> [#uses=1]
+	store i16 %177, i16* @us, align 2
+	%178 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%179 = zext i8 %178 to i32		; <i32> [#uses=2]
+	%180 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%181 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %180, i32 %179)		; <i32> [#uses=1]
+	%182 = xor i32 %181, %179		; <i32> [#uses=1]
+	store i32 %182, i32* @si, align 4
+	%183 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%184 = zext i8 %183 to i32		; <i32> [#uses=2]
+	%185 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%186 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %185, i32 %184)		; <i32> [#uses=1]
+	%187 = xor i32 %186, %184		; <i32> [#uses=1]
+	store i32 %187, i32* @ui, align 4
+	%188 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%189 = zext i8 %188 to i32		; <i32> [#uses=2]
+	%190 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%191 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %190, i32 %189)		; <i32> [#uses=1]
+	%192 = xor i32 %191, %189		; <i32> [#uses=1]
+	store i32 %192, i32* @sl, align 4
+	%193 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%194 = zext i8 %193 to i32		; <i32> [#uses=2]
+	%195 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%196 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %195, i32 %194)		; <i32> [#uses=1]
+	%197 = xor i32 %196, %194		; <i32> [#uses=1]
+	store i32 %197, i32* @ul, align 4
+	%198 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%199 = zext i8 %198 to i64		; <i64> [#uses=2]
+	%200 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%201 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %200, i64 %199)		; <i64> [#uses=1]
+	%202 = xor i64 %201, %199		; <i64> [#uses=1]
+	store i64 %202, i64* @sll, align 8
+	%203 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%204 = zext i8 %203 to i64		; <i64> [#uses=2]
+	%205 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%206 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %205, i64 %204)		; <i64> [#uses=1]
+	%207 = xor i64 %206, %204		; <i64> [#uses=1]
+	store i64 %207, i64* @ull, align 8
+	%208 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%209 = zext i8 %208 to i32		; <i32> [#uses=1]
+	%210 = trunc i32 %209 to i8		; <i8> [#uses=2]
+	%211 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 %210)		; <i8> [#uses=1]
+	%212 = and i8 %211, %210		; <i8> [#uses=1]
+	store i8 %212, i8* @sc, align 1
+	%213 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%214 = zext i8 %213 to i32		; <i32> [#uses=1]
+	%215 = trunc i32 %214 to i8		; <i8> [#uses=2]
+	%216 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 %215)		; <i8> [#uses=1]
+	%217 = and i8 %216, %215		; <i8> [#uses=1]
+	store i8 %217, i8* @uc, align 1
+	%218 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%219 = zext i8 %218 to i32		; <i32> [#uses=1]
+	%220 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%221 = trunc i32 %219 to i16		; <i16> [#uses=2]
+	%222 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %220, i16 %221)		; <i16> [#uses=1]
+	%223 = and i16 %222, %221		; <i16> [#uses=1]
+	store i16 %223, i16* @ss, align 2
+	%224 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%225 = zext i8 %224 to i32		; <i32> [#uses=1]
+	%226 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%227 = trunc i32 %225 to i16		; <i16> [#uses=2]
+	%228 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %226, i16 %227)		; <i16> [#uses=1]
+	%229 = and i16 %228, %227		; <i16> [#uses=1]
+	store i16 %229, i16* @us, align 2
+	%230 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%231 = zext i8 %230 to i32		; <i32> [#uses=2]
+	%232 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%233 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %232, i32 %231)		; <i32> [#uses=1]
+	%234 = and i32 %233, %231		; <i32> [#uses=1]
+	store i32 %234, i32* @si, align 4
+	%235 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%236 = zext i8 %235 to i32		; <i32> [#uses=2]
+	%237 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%238 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %237, i32 %236)		; <i32> [#uses=1]
+	%239 = and i32 %238, %236		; <i32> [#uses=1]
+	store i32 %239, i32* @ui, align 4
+	%240 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%241 = zext i8 %240 to i32		; <i32> [#uses=2]
+	%242 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%243 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %242, i32 %241)		; <i32> [#uses=1]
+	%244 = and i32 %243, %241		; <i32> [#uses=1]
+	store i32 %244, i32* @sl, align 4
+	%245 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%246 = zext i8 %245 to i32		; <i32> [#uses=2]
+	%247 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%248 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %247, i32 %246)		; <i32> [#uses=1]
+	%249 = and i32 %248, %246		; <i32> [#uses=1]
+	store i32 %249, i32* @ul, align 4
+	%250 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%251 = zext i8 %250 to i64		; <i64> [#uses=2]
+	%252 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%253 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %252, i64 %251)		; <i64> [#uses=1]
+	%254 = and i64 %253, %251		; <i64> [#uses=1]
+	store i64 %254, i64* @sll, align 8
+	%255 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%256 = zext i8 %255 to i64		; <i64> [#uses=2]
+	%257 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%258 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %257, i64 %256)		; <i64> [#uses=1]
+	%259 = and i64 %258, %256		; <i64> [#uses=1]
+	store i64 %259, i64* @ull, align 8
+	%260 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%261 = zext i8 %260 to i32		; <i32> [#uses=1]
+	%262 = trunc i32 %261 to i8		; <i8> [#uses=2]
+	%263 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @sc, i8 %262)		; <i8> [#uses=1]
+	%264 = xor i8 %263, -1		; <i8> [#uses=1]
+	%265 = and i8 %264, %262		; <i8> [#uses=1]
+	store i8 %265, i8* @sc, align 1
+	%266 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%267 = zext i8 %266 to i32		; <i32> [#uses=1]
+	%268 = trunc i32 %267 to i8		; <i8> [#uses=2]
+	%269 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @uc, i8 %268)		; <i8> [#uses=1]
+	%270 = xor i8 %269, -1		; <i8> [#uses=1]
+	%271 = and i8 %270, %268		; <i8> [#uses=1]
+	store i8 %271, i8* @uc, align 1
+	%272 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%273 = zext i8 %272 to i32		; <i32> [#uses=1]
+	%274 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%275 = trunc i32 %273 to i16		; <i16> [#uses=2]
+	%276 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %274, i16 %275)		; <i16> [#uses=1]
+	%277 = xor i16 %276, -1		; <i16> [#uses=1]
+	%278 = and i16 %277, %275		; <i16> [#uses=1]
+	store i16 %278, i16* @ss, align 2
+	%279 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%280 = zext i8 %279 to i32		; <i32> [#uses=1]
+	%281 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%282 = trunc i32 %280 to i16		; <i16> [#uses=2]
+	%283 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %281, i16 %282)		; <i16> [#uses=1]
+	%284 = xor i16 %283, -1		; <i16> [#uses=1]
+	%285 = and i16 %284, %282		; <i16> [#uses=1]
+	store i16 %285, i16* @us, align 2
+	%286 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%287 = zext i8 %286 to i32		; <i32> [#uses=2]
+	%288 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%289 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %288, i32 %287)		; <i32> [#uses=1]
+	%290 = xor i32 %289, -1		; <i32> [#uses=1]
+	%291 = and i32 %290, %287		; <i32> [#uses=1]
+	store i32 %291, i32* @si, align 4
+	%292 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%293 = zext i8 %292 to i32		; <i32> [#uses=2]
+	%294 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%295 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %294, i32 %293)		; <i32> [#uses=1]
+	%296 = xor i32 %295, -1		; <i32> [#uses=1]
+	%297 = and i32 %296, %293		; <i32> [#uses=1]
+	store i32 %297, i32* @ui, align 4
+	%298 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%299 = zext i8 %298 to i32		; <i32> [#uses=2]
+	%300 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%301 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %300, i32 %299)		; <i32> [#uses=1]
+	%302 = xor i32 %301, -1		; <i32> [#uses=1]
+	%303 = and i32 %302, %299		; <i32> [#uses=1]
+	store i32 %303, i32* @sl, align 4
+	%304 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%305 = zext i8 %304 to i32		; <i32> [#uses=2]
+	%306 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%307 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %306, i32 %305)		; <i32> [#uses=1]
+	%308 = xor i32 %307, -1		; <i32> [#uses=1]
+	%309 = and i32 %308, %305		; <i32> [#uses=1]
+	store i32 %309, i32* @ul, align 4
+	%310 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%311 = zext i8 %310 to i64		; <i64> [#uses=2]
+	%312 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%313 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %312, i64 %311)		; <i64> [#uses=1]
+	%314 = xor i64 %313, -1		; <i64> [#uses=1]
+	%315 = and i64 %314, %311		; <i64> [#uses=1]
+	store i64 %315, i64* @sll, align 8
+	%316 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%317 = zext i8 %316 to i64		; <i64> [#uses=2]
+	%318 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%319 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %318, i64 %317)		; <i64> [#uses=1]
+	%320 = xor i64 %319, -1		; <i64> [#uses=1]
+	%321 = and i64 %320, %317		; <i64> [#uses=1]
+	store i64 %321, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	%0 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%1 = zext i8 %0 to i32		; <i32> [#uses=1]
+	%2 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%3 = zext i8 %2 to i32		; <i32> [#uses=1]
+	%4 = trunc i32 %3 to i8		; <i8> [#uses=1]
+	%5 = trunc i32 %1 to i8		; <i8> [#uses=1]
+	%6 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @sc, i8 %4, i8 %5)		; <i8> [#uses=1]
+	store i8 %6, i8* @sc, align 1
+	%7 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%8 = zext i8 %7 to i32		; <i32> [#uses=1]
+	%9 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%10 = zext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = trunc i32 %10 to i8		; <i8> [#uses=1]
+	%12 = trunc i32 %8 to i8		; <i8> [#uses=1]
+	%13 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @uc, i8 %11, i8 %12)		; <i8> [#uses=1]
+	store i8 %13, i8* @uc, align 1
+	%14 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%15 = sext i8 %14 to i16		; <i16> [#uses=1]
+	%16 = zext i16 %15 to i32		; <i32> [#uses=1]
+	%17 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%18 = zext i8 %17 to i32		; <i32> [#uses=1]
+	%19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%20 = trunc i32 %18 to i16		; <i16> [#uses=1]
+	%21 = trunc i32 %16 to i16		; <i16> [#uses=1]
+	%22 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %19, i16 %20, i16 %21)		; <i16> [#uses=1]
+	store i16 %22, i16* @ss, align 2
+	%23 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%24 = sext i8 %23 to i16		; <i16> [#uses=1]
+	%25 = zext i16 %24 to i32		; <i32> [#uses=1]
+	%26 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%27 = zext i8 %26 to i32		; <i32> [#uses=1]
+	%28 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%29 = trunc i32 %27 to i16		; <i16> [#uses=1]
+	%30 = trunc i32 %25 to i16		; <i16> [#uses=1]
+	%31 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %28, i16 %29, i16 %30)		; <i16> [#uses=1]
+	store i16 %31, i16* @us, align 2
+	%32 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%33 = sext i8 %32 to i32		; <i32> [#uses=1]
+	%34 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%35 = zext i8 %34 to i32		; <i32> [#uses=1]
+	%36 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%37 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %36, i32 %35, i32 %33)		; <i32> [#uses=1]
+	store i32 %37, i32* @si, align 4
+	%38 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%39 = sext i8 %38 to i32		; <i32> [#uses=1]
+	%40 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%41 = zext i8 %40 to i32		; <i32> [#uses=1]
+	%42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%43 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %42, i32 %41, i32 %39)		; <i32> [#uses=1]
+	store i32 %43, i32* @ui, align 4
+	%44 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%45 = sext i8 %44 to i32		; <i32> [#uses=1]
+	%46 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%47 = zext i8 %46 to i32		; <i32> [#uses=1]
+	%48 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%49 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %48, i32 %47, i32 %45)		; <i32> [#uses=1]
+	store i32 %49, i32* @sl, align 4
+	%50 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%51 = sext i8 %50 to i32		; <i32> [#uses=1]
+	%52 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=1]
+	%54 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%55 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %54, i32 %53, i32 %51)		; <i32> [#uses=1]
+	store i32 %55, i32* @ul, align 4
+	%56 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%57 = zext i8 %56 to i32		; <i32> [#uses=1]
+	%58 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%59 = zext i8 %58 to i32		; <i32> [#uses=1]
+	%60 = trunc i32 %59 to i8		; <i8> [#uses=2]
+	%61 = trunc i32 %57 to i8		; <i8> [#uses=1]
+	%62 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @sc, i8 %60, i8 %61)		; <i8> [#uses=1]
+	%63 = icmp eq i8 %62, %60		; <i1> [#uses=1]
+	%64 = zext i1 %63 to i8		; <i8> [#uses=1]
+	%65 = zext i8 %64 to i32		; <i32> [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	%66 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%67 = zext i8 %66 to i32		; <i32> [#uses=1]
+	%68 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%69 = zext i8 %68 to i32		; <i32> [#uses=1]
+	%70 = trunc i32 %69 to i8		; <i8> [#uses=2]
+	%71 = trunc i32 %67 to i8		; <i8> [#uses=1]
+	%72 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @uc, i8 %70, i8 %71)		; <i8> [#uses=1]
+	%73 = icmp eq i8 %72, %70		; <i1> [#uses=1]
+	%74 = zext i1 %73 to i8		; <i8> [#uses=1]
+	%75 = zext i8 %74 to i32		; <i32> [#uses=1]
+	store i32 %75, i32* @ui, align 4
+	%76 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%77 = sext i8 %76 to i16		; <i16> [#uses=1]
+	%78 = zext i16 %77 to i32		; <i32> [#uses=1]
+	%79 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%80 = zext i8 %79 to i32		; <i32> [#uses=1]
+	%81 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%82 = trunc i32 %80 to i16		; <i16> [#uses=2]
+	%83 = trunc i32 %78 to i16		; <i16> [#uses=1]
+	%84 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %81, i16 %82, i16 %83)		; <i16> [#uses=1]
+	%85 = icmp eq i16 %84, %82		; <i1> [#uses=1]
+	%86 = zext i1 %85 to i8		; <i8> [#uses=1]
+	%87 = zext i8 %86 to i32		; <i32> [#uses=1]
+	store i32 %87, i32* @ui, align 4
+	%88 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%89 = sext i8 %88 to i16		; <i16> [#uses=1]
+	%90 = zext i16 %89 to i32		; <i32> [#uses=1]
+	%91 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%92 = zext i8 %91 to i32		; <i32> [#uses=1]
+	%93 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%94 = trunc i32 %92 to i16		; <i16> [#uses=2]
+	%95 = trunc i32 %90 to i16		; <i16> [#uses=1]
+	%96 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %93, i16 %94, i16 %95)		; <i16> [#uses=1]
+	%97 = icmp eq i16 %96, %94		; <i1> [#uses=1]
+	%98 = zext i1 %97 to i8		; <i8> [#uses=1]
+	%99 = zext i8 %98 to i32		; <i32> [#uses=1]
+	store i32 %99, i32* @ui, align 4
+	%100 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%101 = sext i8 %100 to i32		; <i32> [#uses=1]
+	%102 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%103 = zext i8 %102 to i32		; <i32> [#uses=2]
+	%104 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%105 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %104, i32 %103, i32 %101)		; <i32> [#uses=1]
+	%106 = icmp eq i32 %105, %103		; <i1> [#uses=1]
+	%107 = zext i1 %106 to i8		; <i8> [#uses=1]
+	%108 = zext i8 %107 to i32		; <i32> [#uses=1]
+	store i32 %108, i32* @ui, align 4
+	%109 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%110 = sext i8 %109 to i32		; <i32> [#uses=1]
+	%111 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%112 = zext i8 %111 to i32		; <i32> [#uses=2]
+	%113 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%114 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %113, i32 %112, i32 %110)		; <i32> [#uses=1]
+	%115 = icmp eq i32 %114, %112		; <i1> [#uses=1]
+	%116 = zext i1 %115 to i8		; <i8> [#uses=1]
+	%117 = zext i8 %116 to i32		; <i32> [#uses=1]
+	store i32 %117, i32* @ui, align 4
+	%118 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%119 = sext i8 %118 to i32		; <i32> [#uses=1]
+	%120 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%121 = zext i8 %120 to i32		; <i32> [#uses=2]
+	%122 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%123 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %122, i32 %121, i32 %119)		; <i32> [#uses=1]
+	%124 = icmp eq i32 %123, %121		; <i1> [#uses=1]
+	%125 = zext i1 %124 to i8		; <i8> [#uses=1]
+	%126 = zext i8 %125 to i32		; <i32> [#uses=1]
+	store i32 %126, i32* @ui, align 4
+	%127 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%128 = sext i8 %127 to i32		; <i32> [#uses=1]
+	%129 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%130 = zext i8 %129 to i32		; <i32> [#uses=2]
+	%131 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%132 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %131, i32 %130, i32 %128)		; <i32> [#uses=1]
+	%133 = icmp eq i32 %132, %130		; <i1> [#uses=1]
+	%134 = zext i1 %133 to i8		; <i8> [#uses=1]
+	%135 = zext i8 %134 to i32		; <i32> [#uses=1]
+	store i32 %135, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	%0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	%1 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	%2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%3 = call i16 @llvm.atomic.swap.i16.p0i16(i16* %2, i16 1)		; <i16> [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	%4 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%5 = call i16 @llvm.atomic.swap.i16.p0i16(i16* %4, i16 1)		; <i16> [#uses=1]
+	store i16 %5, i16* @us, align 2
+	%6 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%7 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %6, i32 1)		; <i32> [#uses=1]
+	store i32 %7, i32* @si, align 4
+	%8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%9 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %8, i32 1)		; <i32> [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	%10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%11 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %10, i32 1)		; <i32> [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	%12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%13 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %12, i32 1)		; <i32> [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	%14 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	volatile store i16 0, i16* %14, align 2
+	%15 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	volatile store i16 0, i16* %15, align 2
+	%16 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %16, align 4
+	%17 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %17, align 4
+	%18 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %18, align 4
+	%19 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %19, align 4
+	%20 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	volatile store i64 0, i64* %20, align 8
+	%21 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	volatile store i64 0, i64* %21, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind

diff --git a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
new file mode 100644
index 0000000..7f7b1a4
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
+; PR2850
+
+@tmp_V2i = common global <2 x i32> zeroinitializer		; <<2 x i32>*> [#uses=2]
+
+define void @f0() nounwind {
+entry:
+	%0 = load <2 x i32>* @tmp_V2i, align 8		; <<2 x i32>> [#uses=1]
+	%1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
new file mode 100644
index 0000000..a135cd4
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll

@@ -0,0 +1,13 @@
+; ModuleID = 'nan.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
+; This NaN should be shortened to a double (not a float).
+
+declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
+
+define i32 @main() {
+entry_nan.main:
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000800)
+  ret i32 0
+}

diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
new file mode 100644
index 0000000..bd48105
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll

@@ -0,0 +1,18 @@
+; ModuleID = 'nan.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
+; it is not safe to shorten any of these NaNs.
+
+declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
+
+@_D3nan4rvale = global x86_fp80 0xK7FFF8001234000000000   ; <x86_fp80*> [#uses=1]
+
+define i32 @main() {
+entry_nan.main:
+  %tmp = load x86_fp80* @_D3nan4rvale   ; <x86_fp80> [#uses=1]
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %tmp)
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFF8001234000000000)
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000400)
+  ret i32 0
+}

diff --git a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
new file mode 100644
index 0000000..bc57612
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
+
+define <4 x float> @f(float %w) nounwind {
+entry:
+	%retval = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	%w.addr = alloca float		; <float*> [#uses=2]
+	%.compoundliteral = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	store float %w, float* %w.addr
+	%tmp = load float* %w.addr		; <float> [#uses=1]
+	%0 = insertelement <4 x float> undef, float %tmp, i32 0		; <<4 x float>> [#uses=1]
+	%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %3, <4 x float>* %.compoundliteral
+	%tmp1 = load <4 x float>* %.compoundliteral		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp1, <4 x float>* %retval
+	br label %return
+
+return:		; preds = %entry
+	%4 = load <4 x float>* %retval		; <<4 x float>> [#uses=1]
+	ret <4 x float> %4
+}

diff --git a/test/CodeGen/X86/2008-10-11-CallCrash.ll b/test/CodeGen/X86/2008-10-11-CallCrash.ll
new file mode 100644
index 0000000..efc6125
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-11-CallCrash.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s
+; PR2735
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+@g_385 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_45(i64 %p_46, i32 %p_48) nounwind {
+entry:
+	%0 = tail call i32 (...)* @lshift_s_u(i64 %p_46, i64 0) nounwind		; <i32> [#uses=0]
+	%1 = load i32* @g_385, align 4		; <i32> [#uses=1]
+	%2 = shl i32 %1, 1		; <i32> [#uses=1]
+	%3 = and i32 %2, 32		; <i32> [#uses=1]
+	%4 = tail call i32 (...)* @func_87(i32 undef, i32 %p_48, i32 1) nounwind		; <i32> [#uses=1]
+	%5 = add i32 %3, %4		; <i32> [#uses=1]
+	%6 = tail call i32 (...)* @div_rhs(i32 %5) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @lshift_s_u(...)
+declare i32 @func_87(...)
+declare i32 @div_rhs(...)

diff --git a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
new file mode 100644
index 0000000..4d3f8c2
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86
+; PR2775
+
+define i32 @func_77(i8 zeroext %p_79) nounwind {
+entry:
+	%0 = tail call i32 (...)* @func_43(i32 1) nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb3, label %bb
+
+bb:		; preds = %entry
+	br label %bb3
+
+bb3:		; preds = %bb, %entry
+	%p_79_addr.0 = phi i8 [ 0, %bb ], [ %p_79, %entry ]		; <i8> [#uses=1]
+	%2 = zext i8 %p_79_addr.0 to i32		; <i32> [#uses=2]
+	%3 = zext i1 false to i32		; <i32> [#uses=2]
+	%4 = tail call i32 (...)* @rshift_u_s(i32 1) nounwind		; <i32> [#uses=0]
+	%5 = lshr i32 %2, %2		; <i32> [#uses=3]
+	%6 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb6, label %bb9
+
+bb6:		; preds = %bb3
+	%7 = ashr i32 %5, %3		; <i32> [#uses=1]
+	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
+	%9 = select i1 %8, i32 %3, i32 0		; <i32> [#uses=1]
+	%. = shl i32 %5, %9		; <i32> [#uses=1]
+	br label %bb9
+
+bb9:		; preds = %bb6, %bb3
+	%.0 = phi i32 [ %., %bb6 ], [ %5, %bb3 ]		; <i32> [#uses=0]
+	br i1 false, label %return, label %bb10
+
+bb10:		; preds = %bb9
+	ret i32 undef
+
+return:		; preds = %bb9
+	ret i32 undef
+}
+
+declare i32 @func_43(...)
+
+declare i32 @rshift_u_s(...)

diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll
new file mode 100644
index 0000000..b8ca364
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll

@@ -0,0 +1,155 @@
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
+
+	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
+	%struct.XXDAlphaTest = type { float, i16, i8, i8 }
+	%struct.XXDArrayRange = type { i8, i8, i8, i8 }
+	%struct.XXDBlendMode = type { i16, i16, i16, i16, %struct.XXTColor4, i16, i16, i8, i8, i8, i8 }
+	%struct.XXDClearColor = type { double, %struct.XXTColor4, %struct.XXTColor4, float, i32 }
+	%struct.XXDClipPlane = type { i32, [6 x %struct.XXTColor4] }
+	%struct.XXDColorBuffer = type { i16, i8, i8, [8 x i16], i8, i8, i8, i8 }
+	%struct.XXDColorMatrix = type { [16 x float]*, %struct.XXDImagingCC }
+	%struct.XXDConvolution = type { %struct.XXTColor4, %struct.XXDImagingCC, i16, i16, [0 x i32], float*, i32, i32 }
+	%struct.XXDDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+	%struct.XXDFixedFunction = type { %struct.YYToken* }
+	%struct.XXDFogMode = type { %struct.XXTColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+	%struct.XXDHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+	%struct.XXDHistogram = type { %struct.XXTFixedColor4*, i32, i16, i8, i8 }
+	%struct.XXDImagingCC = type { { float, float }, { float, float }, { float, float }, { float, float } }
+	%struct.XXDImagingSubset = type { %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDConvolution, %struct.XXDColorMatrix, %struct.XXDMinmax, %struct.XXDHistogram, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, %struct.XXDImagingCC, i32, [0 x i32] }
+	%struct.XXDLight = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTCoord3, float, float, float, float, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, %struct.XXTCoord3, float, float, float, float, float }
+	%struct.XXDLightModel = type { %struct.XXTColor4, [8 x %struct.XXDLight], [2 x %struct.XXDMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+	%struct.XXDLightProduct = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4 }
+	%struct.XXDLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+	%struct.XXDLogicOp = type { i16, i8, i8 }
+	%struct.XXDMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXDMaterial = type { %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, %struct.XXTColor4, float, float, float, float, [8 x %struct.XXDLightProduct], %struct.XXTColor4, [8 x i32] }
+	%struct.XXDMinmax = type { %struct.XXDMinmaxTable*, i16, i8, i8, [0 x i32] }
+	%struct.XXDMinmaxTable = type { %struct.XXTColor4, %struct.XXTColor4 }
+	%struct.XXDMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXDPipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.XXTColor4* }
+	%struct.XXDPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.XXDPixelMode = type { float, float, %struct.XXDPixelStore, %struct.XXDPixelTransfer, %struct.XXDPixelMap, %struct.XXDImagingSubset, i32, i32 }
+	%struct.XXDPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+	%struct.XXDPixelStore = type { %struct.XXDPixelPack, %struct.XXDPixelPack }
+	%struct.XXDPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+	%struct.XXDPointMode = type { float, float, float, float, %struct.XXTCoord3, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+	%struct.XXDPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXDRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.XXTColor4], [8 x %struct.XXDRegisterCombinersPerStageState], %struct.XXDRegisterCombinersFinalStageState }
+	%struct.XXDRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XXDRegisterCombinersPerVariableState] }
+	%struct.XXDRegisterCombinersPerPortionState = type { [4 x %struct.XXDRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+	%struct.XXDRegisterCombinersPerStageState = type { [2 x %struct.XXDRegisterCombinersPerPortionState], [2 x %struct.XXTColor4] }
+	%struct.XXDRegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
+	%struct.XXDScissorTest = type { %struct.XXTFixedColor4, i8, i8, i8, i8 }
+	%struct.XXDState = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.XXTColor4], [128 x %struct.XXTColor4], %struct.XXDViewport, %struct.XXDTransform, %struct.XXDLightModel, %struct.XXDActiveTextureTargets, %struct.XXDAlphaTest, %struct.XXDBlendMode, %struct.XXDClearColor, %struct.XXDColorBuffer, %struct.XXDDepthTest, %struct.XXDArrayRange, %struct.XXDFogMode, %struct.XXDHintMode, %struct.XXDLineMode, %struct.XXDLogicOp, %struct.XXDMaskMode, %struct.XXDPixelMode, %struct.XXDPointMode, %struct.XXDPolygonMode, %struct.XXDScissorTest, i32, %struct.XXDStencilTest, [8 x %struct.XXDTextureMode], [16 x %struct.XXDTextureImageMode], %struct.XXDArrayRange, [8 x %struct.XXDTextureCoordGen], %struct.XXDClipPlane, %struct.XXDMultisample, %struct.XXDRegisterCombiners, %struct.XXDArrayRange, %struct.XXDArrayRange, [3 x %struct.XXDPipelineProgramState], %struct.XXDArrayRange, %struct.XXDTransformFeedback, i32*, %struct.XXDFixedFunction, [3 x i32], [2 x i32] }>
+	%struct.XXDStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+	%struct.XXDTextureCoordGen = type { { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, { i16, i16, %struct.XXTColor4, %struct.XXTColor4 }, i8, i8, i8, i8 }
+	%struct.XXDTextureImageMode = type { float }
+	%struct.XXDTextureMode = type { %struct.XXTColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+	%struct.XXDTextureRec = type opaque
+	%struct.XXDTransform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
+	%struct.XXDTransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
+	%struct.XXDViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+	%struct.XXTColor4 = type { float, float, float, float }
+	%struct.XXTCoord3 = type { float, float, float }
+	%struct.XXTFixedColor4 = type { i32, i32, i32, i32 }
+	%struct.XXVMTextures = type { [16 x %struct.XXDTextureRec*] }
+	%struct.XXVMVPContext = type { i32 }
+	%struct.XXVMVPStack = type { i32, i32 }
+	%struct.YYToken = type { { i16, i16, i32 } }
+	%struct._XXVMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [4096 x i8], [8 x float], [48 x float], [128 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.XXDState*, <4 x float>*, <4 x float>**, %struct._XXVMConstants*, %struct.YYToken*, %struct.XXVMVPContext*, %struct.XXVMTextures*, %struct.XXVMVPStack*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*, [4 x <4 x float>]*, i32*, <4 x i32>*, i64)* @t to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
+entry:
+	%0 = trunc i64 %key_token to i32		; <i32> [#uses=1]
+	%1 = getelementptr %struct.YYToken* %pstrm, i32 %0		; <%struct.YYToken*> [#uses=5]
+	br label %bb1132
+
+bb51:		; preds = %bb1132
+	%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 1		; <i16> [#uses=3]
+	%4 = lshr i16 %3, 6		; <i16> [#uses=1]
+	%5 = trunc i16 %4 to i8		; <i8> [#uses=1]
+	%6 = zext i8 %5 to i32		; <i32> [#uses=1]
+	%7 = trunc i16 %3 to i8		; <i8> [#uses=1]
+	%8 = and i8 %7, 7		; <i8> [#uses=1]
+	%mask5556 = zext i8 %8 to i32		; <i32> [#uses=3]
+	%.sum1324 = add i32 %mask5556, 2		; <i32> [#uses=1]
+	%.rec = add i32 %operation.0.rec, %.sum1324		; <i32> [#uses=1]
+	%9 = bitcast %struct.YYToken* %operation.0 to i32*		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 1		; <i32> [#uses=1]
+	%11 = lshr i32 %10, 16		; <i32> [#uses=2]
+	%12 = trunc i32 %11 to i8		; <i8> [#uses=1]
+	%13 = and i8 %12, 1		; <i8> [#uses=1]
+	%14 = lshr i16 %3, 15		; <i16> [#uses=1]
+	%15 = trunc i16 %14 to i8		; <i8> [#uses=1]
+	%16 = or i8 %13, %15		; <i8> [#uses=1]
+	%17 = icmp eq i8 %16, 0		; <i1> [#uses=1]
+	br i1 %17, label %bb94, label %bb75
+
+bb75:		; preds = %bb51
+	%18 = getelementptr %struct.YYToken* %1, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
+	%19 = load i16* %18, align 4		; <i16> [#uses=1]
+	%20 = load i16* null, align 2		; <i16> [#uses=1]
+	%21 = zext i16 %19 to i64		; <i64> [#uses=1]
+	%22 = zext i16 %20 to i64		; <i64> [#uses=1]
+	%23 = shl i64 %22, 16		; <i64> [#uses=1]
+	%.ins1177 = or i64 %23, %21		; <i64> [#uses=1]
+	%.ins1175 = or i64 %.ins1177, 0		; <i64> [#uses=1]
+	%24 = and i32 %11, 1		; <i32> [#uses=1]
+	%.neg1333 = sub i32 %mask5556, %24		; <i32> [#uses=1]
+	%.neg1335 = sub i32 %.neg1333, 0		; <i32> [#uses=1]
+	%25 = sub i32 %.neg1335, 0		; <i32> [#uses=1]
+	br label %bb94
+
+bb94:		; preds = %bb75, %bb51
+	%extraToken.0 = phi i64 [ %.ins1175, %bb75 ], [ %extraToken.1, %bb51 ]		; <i64> [#uses=1]
+	%argCount.0 = phi i32 [ %25, %bb75 ], [ %mask5556, %bb51 ]		; <i32> [#uses=1]
+	%operation.0.sum1392 = add i32 %operation.0.rec, 1		; <i32> [#uses=2]
+	%26 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 0		; <i16*> [#uses=1]
+	%27 = load i16* %26, align 4		; <i16> [#uses=1]
+	%28 = getelementptr %struct.YYToken* %1, i32 %operation.0.sum1392, i32 0, i32 1		; <i16*> [#uses=1]
+	%29 = load i16* %28, align 2		; <i16> [#uses=1]
+	store i16 %27, i16* null, align 8
+	store i16 %29, i16* null, align 2
+	br i1 false, label %bb1132, label %bb110
+
+bb110:		; preds = %bb94
+	switch i32 %6, label %bb1078 [
+		i32 30, label %bb960
+		i32 32, label %bb801
+		i32 38, label %bb809
+		i32 78, label %bb1066
+	]
+
+bb801:		; preds = %bb110
+	unreachable
+
+bb809:		; preds = %bb110
+	unreachable
+
+bb960:		; preds = %bb110
+	%30 = icmp eq i32 %argCount.0, 1		; <i1> [#uses=1]
+	br i1 %30, label %bb962, label %bb965
+
+bb962:		; preds = %bb960
+	unreachable
+
+bb965:		; preds = %bb960
+	unreachable
+
+bb1066:		; preds = %bb110
+	unreachable
+
+bb1078:		; preds = %bb110
+	unreachable
+
+bb1132:		; preds = %bb94, %entry
+	%extraToken.1 = phi i64 [ undef, %entry ], [ %extraToken.0, %bb94 ]		; <i64> [#uses=1]
+	%operation.0.rec = phi i32 [ 0, %entry ], [ %.rec, %bb94 ]		; <i32> [#uses=4]
+	%operation.0 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec		; <%struct.YYToken*> [#uses=1]
+	br i1 false, label %bb1134, label %bb51
+
+bb1134:		; preds = %bb1132
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
new file mode 100644
index 0000000..de4c1e7
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2762
+define void @foo(<4 x i32>* %p, <4 x double>* %q) {
+  %n = load <4 x i32>* %p
+  %z = sitofp <4 x i32> %n to <4 x double>
+  store <4 x double> %z, <4 x double>* %q
+  ret void
+}

diff --git a/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
new file mode 100644
index 0000000..b2e6061
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @test(i64 %x) nounwind {
+entry:
+	tail call void asm sideeffect "ASM: $0", "r,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret void
+}
+

diff --git a/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
new file mode 100644
index 0000000..353d1c7
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+; from gcc.c-torture/compile/920520-1.c
+
+define i32 @g() nounwind {
+entry:
+	call void asm sideeffect "$0", "r"(double 1.500000e+00) nounwind
+	ret i32 0
+}
+

diff --git a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
new file mode 100644
index 0000000..421b931
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+
+define void @f(float %wt) {
+entry:
+	%0 = fcmp ogt float %wt, 0.000000e+00		; <i1> [#uses=1]
+	%1 = tail call i32 @g(i32 44)		; <i32> [#uses=3]
+	%2 = inttoptr i32 %1 to i8*		; <i8*> [#uses=2]
+	br i1 %0, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret void
+
+bb1:		; preds = %entry
+	ret void
+}
+
+declare i32 @g(i32)

diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
new file mode 100644
index 0000000..afeb358
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+
+define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar93 = phi i32 [ 0, %entry ], [ %idim.030, %bb ]		; <i32> [#uses=2]
+	%idim.030 = add i32 %indvar93, 1		; <i32> [#uses=1]
+	%0 = add i32 %indvar93, 2		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 2		; <i1> [#uses=1]
+	br i1 %1, label %bb30.loopexit, label %bb
+
+bb3:		; preds = %bb30.loopexit, %bb25, %bb3
+	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = mul i32 %2, 0		; <i32> [#uses=1]
+	%4 = icmp slt i32 0, %3		; <i1> [#uses=1]
+	br i1 %4, label %bb18, label %bb3
+
+bb18:		; preds = %bb3
+	%5 = fdiv double %11, 0.000000e+00		; <double> [#uses=1]
+	%6 = tail call double @sin(double %5) nounwind readonly		; <double> [#uses=1]
+	br label %bb24.preheader
+
+bb22.preheader:		; preds = %bb24.preheader, %bb22.preheader
+	br label %bb22.preheader
+
+bb25:		; preds = %bb24.preheader
+	%7 = fmul double 0.000000e+00, %6		; <double> [#uses=0]
+	%8 = add i32 %i3.122100, 0		; <i32> [#uses=1]
+	%9 = icmp sgt i32 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb3, label %bb24.preheader
+
+bb24.preheader:		; preds = %bb25, %bb18
+	%i3.122100 = or i32 0, 1		; <i32> [#uses=2]
+	%10 = icmp slt i32 0, %i3.122100		; <i1> [#uses=1]
+	br i1 %10, label %bb25, label %bb22.preheader
+
+bb30.loopexit:		; preds = %bb
+	%11 = fmul double 0.000000e+00, 0x401921FB54442D1C		; <double> [#uses=1]
+	br label %bb3
+}
+
+declare double @sin(double) nounwind readonly

diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
new file mode 100644
index 0000000..784bc72
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll

@@ -0,0 +1,22 @@
+; Linux doesn't support stack realignment for functions with allocas (PR2888).
+; Until it does, we shouldn't use movaps to access the stack.  On targets with
+; sufficiently aligned stack (e.g. darwin) we should.
+
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | not grep movaps
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
+
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+  
+define void @foo(i32 %t) nounwind {
+  %tmp1210 = alloca i8, i32 32, align 4
+  call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4)
+  
+  %x = alloca i8, i32 %t
+  call void @dummy(i8* %x)
+  ret void
+}
+
+declare void @dummy(i8* %x)
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind

diff --git a/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
new file mode 100644
index 0000000..7ad94f1
--- /dev/null
+++ b/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+; PR2977
+define i8* @ap_php_conv_p2(){
+entry:
+        %ap.addr = alloca i8*           ; <i8**> [#uses=36]
+        br label %sw.bb301
+sw.bb301:
+        %0 = va_arg i8** %ap.addr, i64          ; <i64> [#uses=1]
+        br label %sw.bb301
+}

diff --git a/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
new file mode 100644
index 0000000..507799b
--- /dev/null
+++ b/test/CodeGen/X86/2008-11-03-F80VAARG.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o - | not grep 10
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+define x86_fp80 @test(...) nounwind {
+	%ap = alloca i8*		; <i8**> [#uses=3]
+	%v1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start(i8* %v1)
+	%t1 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
+	%t2 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
+	%t = fadd x86_fp80 %t1, %t2		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %t
+}

diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
new file mode 100644
index 0000000..f8f317c
--- /dev/null
+++ b/test/CodeGen/X86/2008-11-06-testb.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.x = type <{ i8, i8, i16 }>
+
+define i32 @foo(%struct.x* %p) nounwind {
+entry:
+	%0 = getelementptr %struct.x* %p, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 55, i8* %0, align 1
+	%1 = bitcast %struct.x* %p to i32*		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 1		; <i32> [#uses=1]
+	%3 = and i32 %2, 512		; <i32> [#uses=1]
+	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
+	br i1 %4, label %bb5, label %bb
+
+bb:		; preds = %entry
+	%5 = tail call i32 (...)* @xx() nounwind		; <i32> [#uses=1]
+	ret i32 %5
+
+bb5:		; preds = %entry
+	ret i32 0
+}
+
+declare i32 @xx(...)

diff --git a/test/CodeGen/X86/2008-11-13-inlineasm-3.ll b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
new file mode 100644
index 0000000..1dc97fc
--- /dev/null
+++ b/test/CodeGen/X86/2008-11-13-inlineasm-3.ll

@@ -0,0 +1,19 @@
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu
+; PR 1779
+; Using 'A' constraint and a tied constraint together used to crash.
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.linux_dirent64 = type { i64, i64, i16, i8, [0 x i8] }
+
+define i32 @sys_getdents64(i32 %fd, %struct.linux_dirent64* %dirent, i32 %count) {
+entry:
+	br i1 true, label %cond_next29, label %UnifiedReturnBlock
+
+cond_next29:		; preds = %entry
+	%tmp83 = call i32 asm sideeffect "1:\09movl %eax,0($2)\0A2:\09movl %edx,4($2)\0A3:\0A.section .fixup,\22ax\22\0A4:\09movl $3,$0\0A\09jmp 3b\0A.previous\0A .section __ex_table,\22a\22\0A .balign 4 \0A .long 1b,4b\0A .previous\0A .section __ex_table,\22a\22\0A .balign 4 \0A .long 2b,4b\0A .previous\0A", "=r,A,r,i,0,~{dirflag},~{fpsr},~{flags}"(i64 0, i64* null, i32 -14, i32 0) nounwind		; <i32> [#uses=0]
+        br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 -14
+}

diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
new file mode 100644
index 0000000..2e114ab
--- /dev/null
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985 | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @a(i16 zeroext %x) nounwind {
+entry:
+	%div = udiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+}

diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll
new file mode 100644
index 0000000..7c811af
--- /dev/null
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bitSigned.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | grep -- -1985
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define signext i16 @a(i16 signext %x) nounwind {
+entry:
+	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+}

diff --git a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
new file mode 100644
index 0000000..6dca141
--- /dev/null
+++ b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll

@@ -0,0 +1,22 @@
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @a(i32 %x) nounwind {
+entry:
+	%cmp = icmp ult i32 %x, -2147483648		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	%call = call i32 (...)* @b()		; <i32> [#uses=0]
+	br label %if.end
+
+if.end:		; preds = %if.then, %entry
+	br label %return
+
+return:		; preds = %if.end
+	ret i32 undef
+}
+
+declare i32 @b(...)
+

diff --git a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
new file mode 100644
index 0000000..d96d806
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR3124
+
+        %struct.cpuinfo_x86 = type { i8, i8, i8, i8, i32, i8, i8, i8, i32, i32, [9 x i32], [16 x i8], [64 x i8], i32, i32, i32, i64, %struct.cpumask_t, i16, i16, i16, i16, i16, i16, i16, i16, i32 }
+        %struct.cpumask_t = type { [1 x i64] }
+@.str10 = external constant [70 x i8]           ; <[70 x i8]*> [#uses=1]
+
+declare i32 @printk(i8*, ...)
+
+define void @display_cacheinfo(%struct.cpuinfo_x86* %c) nounwind section ".cpuinit.text" {
+entry:
+        %asmtmp = tail call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 -2147483643, i32 0) nounwind          ; <{ i32, i32, i32, i32 }> [#uses=0]
+        %0 = tail call i32 (i8*, ...)* @printk(i8* getelementptr ([70 x i8]* @.str10, i32 0, i64 0), i32 0, i32 0, i32 0, i32 0) nounwind           ; <i32> [#uses=0]
+        unreachable
+}

diff --git a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
new file mode 100644
index 0000000..1f8bd45
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep lea
+; The inner loop should use [reg] addressing, not [reg+reg] addressing.
+; rdar://6403965
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+
+define i8* @test(i8* %Q, i32* %L) nounwind {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1, %bb1
+	%indvar.next = add i32 %P.0.rec, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%P.0 = getelementptr i8* %Q, i32 %P.0.rec		; <i8*> [#uses=2]
+	%0 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	switch i8 %0, label %bb3 [
+		i8 12, label %bb
+		i8 42, label %bb
+	]
+
+bb3:		; preds = %bb1
+	%P.0.sum = add i32 %P.0.rec, 2		; <i32> [#uses=1]
+	%1 = getelementptr i8* %Q, i32 %P.0.sum		; <i8*> [#uses=1]
+	store i8 4, i8* %1, align 1
+	ret i8* %P.0
+}

diff --git a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
new file mode 100644
index 0000000..4b72cb9
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s
+; PR3117
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_118 = external global i8		; <i8*> [#uses=1]
+@g_7 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_73(i32 %p_74) nounwind {
+entry:
+	%0 = load i32* @g_7, align 4		; <i32> [#uses=1]
+	%1 = or i8 0, 118		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i64		; <i64> [#uses=1]
+	%3 = icmp ne i32 %0, 0		; <i1> [#uses=1]
+	%4 = zext i1 %3 to i64		; <i64> [#uses=1]
+	%5 = or i64 %4, -758998846		; <i64> [#uses=3]
+	%6 = icmp sle i64 %2, %5		; <i1> [#uses=1]
+	%7 = zext i1 %6 to i8		; <i8> [#uses=1]
+	%8 = or i8 %7, 118		; <i8> [#uses=1]
+	%9 = zext i8 %8 to i64		; <i64> [#uses=1]
+	%10 = icmp sle i64 %9, 0		; <i1> [#uses=1]
+	%11 = zext i1 %10 to i8		; <i8> [#uses=1]
+	%12 = or i8 %11, 118		; <i8> [#uses=1]
+	%13 = zext i8 %12 to i64		; <i64> [#uses=1]
+	%14 = icmp sle i64 %13, %5		; <i1> [#uses=1]
+	%15 = zext i1 %14 to i8		; <i8> [#uses=1]
+	%16 = or i8 %15, 118		; <i8> [#uses=1]
+	%17 = zext i8 %16 to i64		; <i64> [#uses=1]
+	%18 = icmp sle i64 %17, 0		; <i1> [#uses=1]
+	%19 = zext i1 %18 to i8		; <i8> [#uses=1]
+	%20 = or i8 %19, 118		; <i8> [#uses=1]
+	%21 = zext i8 %20 to i64		; <i64> [#uses=1]
+	%22 = icmp sle i64 %21, %5		; <i1> [#uses=1]
+	%23 = zext i1 %22 to i8		; <i8> [#uses=1]
+	%24 = or i8 %23, 118		; <i8> [#uses=1]
+	store i8 %24, i8* @g_118, align 1
+	ret i32 undef
+}

diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
new file mode 100644
index 0000000..fe5bff3
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i8* @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %a to i32
+        %1 = sub i32 -2, %0
+        %2 = ptrtoint i8* %P to i32
+        %3 = sub i32 0, %2
+        %4 = ptrtoint i8* %L to i32
+        %5 = add i32 %4, %3
+	%6 = add i32 %5, %1         	; <i32> [#uses=1]
+	%7 = getelementptr i8* %a, i32 %6		; <i8*> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i8* %7
+}

diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
new file mode 100644
index 0000000..4cb1b42
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i8* @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %a to i32
+        %1 = ptrtoint i8* %P to i32
+        %2 = sub i32 %1, %0
+        %3 = ptrtoint i8* %L to i32
+	%4 = sub i32 %2, %3         	; <i32> [#uses=1]
+	%5 = getelementptr i8* %a, i32 %4		; <i8*> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i8* %5
+}

diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-3.ll b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
new file mode 100644
index 0000000..d5a676a
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-3.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep add | count 2
+; RUN: llc < %s -march=x86 | grep sub | grep -v subsections | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; this should be rearranged to have two +s and one -
+define i32 @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %P to i32
+        %1 = sub i32 -2, %0
+        %2 = ptrtoint i8* %L to i32
+        %3 = ptrtoint i8* %a to i32
+	%4 = sub i32 %2, %3         	; <i32> [#uses=1]
+	%5 = add i32 %1, %4		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %5
+}

diff --git a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
new file mode 100644
index 0000000..7fd2e6f
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll

@@ -0,0 +1,237 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
+
+	%struct.XXActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
+	%struct.XXAlphaTest = type { float, i16, i8, i8 }
+	%struct.XXArrayRange = type { i8, i8, i8, i8 }
+	%struct.XXBlendMode = type { i16, i16, i16, i16, %struct.ZZIColor4, i16, i16, i8, i8, i8, i8 }
+	%struct.XXBBRec = type opaque
+	%struct.XXBBstate = type { %struct.ZZGTransformKey, %struct.ZZGTransformKey, %struct.XXProgramLimits, %struct.XXProgramLimits, i8, i8, i8, i8, %struct.ZZSBB, %struct.ZZSBB, [4 x %struct.ZZSBB], %struct.ZZSBB, %struct.ZZSBB, %struct.ZZSBB, [8 x %struct.ZZSBB], %struct.ZZSBB }
+	%struct.XXClearColor = type { double, %struct.ZZIColor4, %struct.ZZIColor4, float, i32 }
+	%struct.XXClipPlane = type { i32, [6 x %struct.ZZIColor4] }
+	%struct.XXColorBB = type { i16, i8, i8, [8 x i16], i8, i8, i8, i8 }
+	%struct.XXColorMatrix = type { [16 x float]*, %struct.XXImagingColorScale }
+	%struct.XXConfig = type { i32, float, %struct.ZZGTransformKey, %struct.ZZGTransformKey, i8, i8, i8, i8, i8, i8, i16, i32, i32, i32, %struct.XXPixelFormatInfo, %struct.XXPointLineLimits, %struct.XXPointLineLimits, %struct.XXRenderFeatures, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.XXTextureLimits, [3 x %struct.XXPipelineProgramLimits], %struct.XXFragmentProgramLimits, %struct.XXVertexProgramLimits, %struct.XXGeometryShaderLimits, %struct.XXProgramLimits, %struct.XXGeometryShaderLimits, %struct.XXVertexDescriptor*, %struct.XXVertexDescriptor*, [3 x i32], [4 x i32], [0 x i32] }
+	%struct.XXContextRec = type { float, float, float, float, float, float, float, float, %struct.ZZIColor4, %struct.ZZIColor4, %struct.YYFPContext, [16 x [2 x %struct.PPStreamToken]], %struct.ZZGProcessor, %struct._YYConstants*, void (%struct.XXContextRec*, i32, i32, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, i32)*, %struct._YYFunction*, %struct.PPStreamToken*, void (%struct.XXContextRec*, %struct.XXVertex*)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*, %struct.XXVertex*)*, %struct._YYFunction*, %struct._YYFunction*, %struct._YYFunction*, [4 x i32], [3 x i32], [3 x i32], float, float, float, %struct.PPStreamToken, i32, %struct.ZZSDrawable, %struct.XXFramebufferRec*, %struct.XXFramebufferRec*, %struct.XXRect, %struct.XXFormat, %struct.XXFormat, %struct.XXFormat, %struct.XXConfig*, %struct.XXBBstate, %struct.XXBBstate, %struct.XXSharedRec*, %struct.XXState*, %struct.XXPluginState*, %struct.XXVertex*, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, %struct.XXProgramRec*, %struct.XXPipelineProgramRec*, %struct.YYTextures, %struct.XXStippleData, i8, i16, i8, i32, i32, i32, %struct.XXQueryRec*, %struct.XXQueryRec*, %struct.XXFallback, { void (i8*, i8*, i32, i8*)* } }
+	%struct.XXConvolution = type { %struct.ZZIColor4, %struct.XXImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
+	%struct.XXCurrent16A = type { [8 x %struct.ZZIColor4], [16 x %struct.ZZIColor4], %struct.ZZIColor4, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, [4 x float], %struct.XXPointLineLimits, float, float, float, float, i8, i8, i8, i8 }
+	%struct.XXDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+	%struct.XXDrawableWindow = type { i32, i32, i32 }
+	%struct.XXFallback = type { float*, %struct.XXRenderDispatch*, %struct.XXConfig*, i8*, i8*, i32, i32 }
+	%struct.XXFenceRec = type opaque
+	%struct.XXFixedFunction = type { %struct.PPStreamToken* }
+	%struct.XXFogMode = type { %struct.ZZIColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+	%struct.XXFormat = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32 }
+	%struct.XXFragmentProgramLimits = type { i32, i32, i32, i16, i16, i32, i32 }
+	%struct.XXFramebufferAttachment = type { i16, i16, i32, i32, i32 }
+	%struct.XXFramebufferData = type { [10 x %struct.XXFramebufferAttachment], [8 x i16], i16, i16, i16, i8, i8, i32, i32 }
+	%struct.XXFramebufferRec = type { %struct.XXFramebufferData*, %struct.XXPluginFramebufferData*, %struct.XXFormat, i8, i8, i8, i8 }
+	%struct.XXGeometryShaderLimits = type { i32, i32, i32, i32, i32 }
+	%struct.XXHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+	%struct.XXHistogram = type { %struct.XXProgramLimits*, i32, i16, i8, i8 }
+	%struct.XXImagingColorScale = type { %struct.ZZTCoord2, %struct.ZZTCoord2, %struct.ZZTCoord2, %struct.ZZTCoord2 }
+	%struct.XXImagingSubset = type { %struct.XXConvolution, %struct.XXConvolution, %struct.XXConvolution, %struct.XXColorMatrix, %struct.XXMinmax, %struct.XXHistogram, %struct.XXImagingColorScale, %struct.XXImagingColorScale, %struct.XXImagingColorScale, %struct.XXImagingColorScale, i32, [0 x i32] }
+	%struct.XXLight = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.XXPointLineLimits, float, float, float, float, float, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, float, float, float, float }
+	%struct.XXLightModel = type { %struct.ZZIColor4, [8 x %struct.XXLight], [2 x %struct.XXMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+	%struct.XXLightProduct = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4 }
+	%struct.XXLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+	%struct.XXLogicOp = type { i16, i8, i8 }
+	%struct.XXMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXMaterial = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, float, float, float, float, [8 x %struct.XXLightProduct], %struct.ZZIColor4, [8 x i32] }
+	%struct.XXMinmax = type { %struct.XXMinmaxTable*, i16, i8, i8, [0 x i32] }
+	%struct.XXMinmaxTable = type { %struct.ZZIColor4, %struct.ZZIColor4 }
+	%struct.XXMipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+	%struct.XXMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXPipelineProgramData = type { i16, i8, i8, i32, %struct.PPStreamToken*, i64, %struct.ZZIColor4*, i32, [0 x i32] }
+	%struct.XXPipelineProgramLimits = type { i32, i16, i16, i32, i16, i16, i32, i32 }
+	%struct.XXPipelineProgramRec = type { %struct.XXPipelineProgramData*, %struct.PPStreamToken*, %struct.XXContextRec*, { %struct._YYFunction*, \2, \2, [20 x i32], [64 x i32], i32, i32, i32 }*, i32, i32 }
+	%struct.XXPipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.ZZIColor4* }
+	%struct.XXPixelFormatInfo = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.XXPixelMode = type { float, float, %struct.XXPixelStore, %struct.XXPixelTransfer, %struct.XXPixelMap, %struct.XXImagingSubset, i32, i32 }
+	%struct.XXPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+	%struct.XXPixelStore = type { %struct.XXPixelPack, %struct.XXPixelPack }
+	%struct.XXPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+	%struct.XXPluginFramebufferData = type { [10 x %struct.XXTextureRec*], i8, i8, i8, i8 }
+	%struct.XXPluginProgramData = type { [3 x %struct.XXPipelineProgramRec*], %struct.XXBBRec**, i32, [0 x i32] }
+	%struct.XXPluginState = type { [16 x [5 x %struct.XXTextureRec*]], [3 x %struct.XXTextureRec*], [3 x %struct.XXPipelineProgramRec*], [3 x %struct.XXPipelineProgramRec*], %struct.XXProgramRec*, %struct.XXVertexArrayRec*, [16 x %struct.XXBBRec*], %struct.XXFramebufferRec*, %struct.XXFramebufferRec* }
+	%struct.XXPointLineLimits = type { float, float, float }
+	%struct.XXPointMode = type { float, float, float, float, %struct.XXPointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+	%struct.XXPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXProgramData = type { i32, i32, i32, i32, %struct.PPStreamToken*, i32*, i32, i32, i32, i32, i8, i8, i8, i8, [0 x i32] }
+	%struct.XXProgramLimits = type { i32, i32, i32, i32 }
+	%struct.XXProgramRec = type { %struct.XXProgramData*, %struct.XXPluginProgramData*, %struct.ZZIColor4**, i32 }
+	%struct.XXQueryRec = type { i32, i32, %struct.XXQueryRec* }
+	%struct.XXRect = type { i32, i32, i32, i32, i32, i32 }
+	%struct.XXRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.ZZIColor4], [8 x %struct.XXRegisterCombinersPerStageState], %struct.XXRegisterCombinersFinalStageState }
+	%struct.XXRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XXRegisterCombinersPerVariableState] }
+	%struct.XXRegisterCombinersPerPortionState = type { [4 x %struct.XXRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+	%struct.XXRegisterCombinersPerStageState = type { [2 x %struct.XXRegisterCombinersPerPortionState], [2 x %struct.ZZIColor4] }
+	%struct.XXRegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
+	%struct.XXRenderDispatch = type { void (%struct.XXContextRec*, i32, float)*, void (%struct.XXContextRec*, i32)*, i32 (%struct.XXContextRec*, i32, i32, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, i32 (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, i32, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, float, float, i8*, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32, i32)*, i8* (%struct.XXContextRec*, i32, i32*)*, void (%struct.XXContextRec*, i32, i32, i32)*, i8* (%struct.XXContextRec*, i32, i32, i32, i32, i32)*, void (%struct.XXContextRec*, i32, i32, i32, i32, i32, i8*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*, %struct.XXFenceRec*)*, void (%struct.XXContextRec*, i32, %struct.XXQueryRec*)*, void (%struct.XXContextRec*, %struct.XXQueryRec*)*, i32 (%struct.XXContextRec*, i32, i32, i32, i32, i32, i8*, %struct.ZZIColor4*, %struct.XXCurrent16A*)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32)*, i32 (%struct.XXContextRec*, %struct.XXBBRec*, i32, i32, i8*)*, void (%struct.XXContextRec*, i32)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (%struct.XXContextRec*, %struct.XXQueryRec*)*, void (%struct.XXContextRec*)* }
+	%struct.XXRenderFeatures = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXSWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.XXScissorTest = type { %struct.XXProgramLimits, i8, i8, i8, i8 }
+	%struct.XXSharedData = type {  }
+	%struct.XXSharedRec = type { %struct.__ZZarrayelementDrawInfoListType, %struct.XXSharedData*, i32, i8, i8, i8, i8 }
+	%struct.XXState = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.ZZIColor4], [128 x %struct.ZZIColor4], %struct.XXViewport, %struct.XXTransform, %struct.XXLightModel, %struct.XXActiveTextureTargets, %struct.XXAlphaTest, %struct.XXBlendMode, %struct.XXClearColor, %struct.XXColorBB, %struct.XXDepthTest, %struct.XXArrayRange, %struct.XXFogMode, %struct.XXHintMode, %struct.XXLineMode, %struct.XXLogicOp, %struct.XXMaskMode, %struct.XXPixelMode, %struct.XXPointMode, %struct.XXPolygonMode, %struct.XXScissorTest, i32, %struct.XXStencilTest, [8 x %struct.XXTextureMode], [16 x %struct.XXTextureImageMode], %struct.XXArrayRange, [8 x %struct.XXTextureCoordGen], %struct.XXClipPlane, %struct.XXMultisample, %struct.XXRegisterCombiners, %struct.XXArrayRange, %struct.XXArrayRange, [3 x %struct.XXPipelineProgramState], %struct.XXArrayRange, %struct.XXTransformFeedback, i32*, %struct.XXFixedFunction, [1 x i32] }>
+	%struct.XXStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+	%struct.XXStippleData = type { i32, i16, i16, [32 x [32 x i8]] }
+	%struct.XXTextureCoordGen = type { { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, i8, i8, i8, i8 }
+	%struct.XXTextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
+	%struct.XXTextureImageMode = type { float }
+	%struct.XXTextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.XXTextureLimits = type { float, float, i16, i16, i16, i16, i16, i16, i16, i16, i16, i8, i8, [16 x i16], i32 }
+	%struct.XXTextureMode = type { %struct.ZZIColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+	%struct.XXTextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.ZZIColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
+	%struct.XXTextureRec = type { [4 x float], %struct.XXTextureState*, %struct.XXMipmaplevel*, %struct.XXMipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
+	%struct.XXTextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.XXSWRSurfaceRec*, %struct.XXTextureParamState, %struct.XXTextureGeomState, i16, i16, i8*, %struct.XXTextureLevel, [1 x [15 x %struct.XXTextureLevel]] }
+	%struct.XXTransform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
+	%struct.XXTransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
+	%struct.XXVertex = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.XXPointLineLimits, float, %struct.ZZIColor4, float, i8, i8, i8, i8, float, float, i32, i32, i32, i32, [4 x float], [2 x %struct.XXMaterial*], [2 x i32], [8 x %struct.ZZIColor4] }
+	%struct.XXVertexArrayRec = type opaque
+	%struct.XXVertexDescriptor = type { i8, i8, i8, i8, [0 x i32] }
+	%struct.XXVertexProgramLimits = type { i16, i16, i32, i32 }
+	%struct.XXViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+	%struct.ZZGColorTable = type { i32, i32, i32, i8* }
+	%struct.ZZGOperation = type { i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, %struct.ZZGColorTable, %struct.ZZGColorTable, %struct.ZZGColorTable }
+	%struct.ZZGProcessor = type { void (%struct.XXPixelMode*, %struct.ZZGOperation*, %struct._ZZGProcessorData*, %union._ZZGFunctionKey*)*, %struct._YYFunction*, %union._ZZGFunctionKey*, %struct._ZZGProcessorData* }
+	%struct.ZZGTransformKey = type { i32, i32 }
+	%struct.ZZIColor4 = type { float, float, float, float }
+	%struct.ZZSBB = type { i8* }
+	%struct.ZZSDrawable = type { %struct.ZZSWindowRec* }
+	%struct.ZZSWindowRec = type { %struct.ZZGTransformKey, %struct.ZZGTransformKey, i32, i32, %struct.ZZSDrawable, i8*, i8*, i8*, i8*, i8*, [4 x i8*], i32, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, %struct.XXDrawableWindow, i32, i32, i8*, i8* }
+	%struct.ZZTCoord2 = type { float, float }
+	%struct.YYFPContext = type { float, i32, i32, i32, float, [3 x float] }
+	%struct.YYFragmentAttrib = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] }
+	%struct.YYTextures = type { [16 x %struct.XXTextureRec*] }
+	%struct.PPStreamToken = type { { i16, i16, i32 } }
+	%struct._ZZGProcessorData = type { void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)*, i8* (i32)*, void (i8*)* }
+	%struct._YYConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [4096 x i8], [8 x float], [48 x float], [128 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
+	%struct._YYFunction = type opaque
+	%struct.__ZZarrayelementDrawInfoListType = type { i32, [40 x i8] }
+	%union._ZZGFunctionKey = type opaque
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.XXContextRec*, i32, i32, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, i32)* @t to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @t(%struct.XXContextRec* %ctx, i32 %x, i32 %y, %struct.YYFragmentAttrib* %start, %struct.YYFragmentAttrib* %deriv, i32 %num_frags) nounwind {
+entry:
+	%tmp7485.i.i.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp8382.i.i.i = extractelement <4 x i32> zeroinitializer, i32 1		; <i32> [#uses=1]
+	%tmp8383.i.i.i = extractelement <4 x i32> zeroinitializer, i32 2		; <i32> [#uses=2]
+	%tmp8384.i.i.i = extractelement <4 x i32> zeroinitializer, i32 3		; <i32> [#uses=2]
+	br label %bb7551.i.i.i
+
+bb4426.i.i.i:		; preds = %bb7551.i.i.i
+	%0 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8383.i.i.i, i32 3		; <[4 x i32]*> [#uses=1]
+	%1 = bitcast [4 x i32]* %0 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%2 = load <4 x i32>* %1, align 16		; <<4 x i32>> [#uses=1]
+	%3 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8384.i.i.i, i32 3		; <[4 x i32]*> [#uses=1]
+	%4 = bitcast [4 x i32]* %3 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%5 = load <4 x i32>* %4, align 16		; <<4 x i32>> [#uses=1]
+	%6 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	%7 = bitcast <4 x i32> %6 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%8 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %7, <2 x i32> < i32 1, i32 3 >		; <<2 x i64>> [#uses=1]
+	%9 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8382.i.i.i, i32 6		; <float**> [#uses=1]
+	%10 = load float** %9, align 4		; <float*> [#uses=1]
+	%11 = bitcast float* %10 to i8*		; <i8*> [#uses=1]
+	%12 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8383.i.i.i, i32 6		; <float**> [#uses=1]
+	%13 = load float** %12, align 4		; <float*> [#uses=1]
+	%14 = bitcast float* %13 to i8*		; <i8*> [#uses=1]
+	%15 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8384.i.i.i, i32 6		; <float**> [#uses=1]
+	%16 = load float** %15, align 4		; <float*> [#uses=1]
+	%17 = bitcast float* %16 to i8*		; <i8*> [#uses=1]
+	%tmp7308.i.i.i = and <2 x i64> zeroinitializer, %8		; <<2 x i64>> [#uses=1]
+	%18 = bitcast <2 x i64> %tmp7308.i.i.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%19 = mul <4 x i32> %18, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%20 = add <4 x i32> %19, zeroinitializer		; <<4 x i32>> [#uses=3]
+	%21 = load i32* null, align 4		; <i32> [#uses=0]
+	%22 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind readnone		; <<4 x float>> [#uses=1]
+	%23 = fmul <4 x float> %22, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%tmp2114.i119.i.i = extractelement <4 x i32> %20, i32 1		; <i32> [#uses=1]
+	%24 = shl i32 %tmp2114.i119.i.i, 2		; <i32> [#uses=1]
+	%25 = getelementptr i8* %11, i32 %24		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = or i32 %27, -16777216		; <i32> [#uses=1]
+	%tmp1927.i120.i.i = insertelement <4 x i32> undef, i32 %28, i32 0		; <<4 x i32>> [#uses=1]
+	%29 = bitcast <4 x i32> %tmp1927.i120.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%30 = shufflevector <16 x i8> %29, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%31 = bitcast <16 x i8> %30 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%32 = shufflevector <8 x i16> %31, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%33 = bitcast <8 x i16> %32 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%34 = shufflevector <4 x i32> %33, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
+	%35 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %34) nounwind readnone		; <<4 x float>> [#uses=1]
+	%36 = fmul <4 x float> %35, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%tmp2113.i124.i.i = extractelement <4 x i32> %20, i32 2		; <i32> [#uses=1]
+	%37 = shl i32 %tmp2113.i124.i.i, 2		; <i32> [#uses=1]
+	%38 = getelementptr i8* %14, i32 %37		; <i8*> [#uses=1]
+	%39 = bitcast i8* %38 to i32*		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = or i32 %40, -16777216		; <i32> [#uses=1]
+	%tmp1963.i125.i.i = insertelement <4 x i32> undef, i32 %41, i32 0		; <<4 x i32>> [#uses=1]
+	%42 = bitcast <4 x i32> %tmp1963.i125.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%43 = shufflevector <16 x i8> %42, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%44 = bitcast <16 x i8> %43 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%45 = shufflevector <8 x i16> %44, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%46 = bitcast <8 x i16> %45 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%47 = shufflevector <4 x i32> %46, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
+	%48 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %47) nounwind readnone		; <<4 x float>> [#uses=1]
+	%49 = fmul <4 x float> %48, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%tmp2112.i129.i.i = extractelement <4 x i32> %20, i32 3		; <i32> [#uses=1]
+	%50 = shl i32 %tmp2112.i129.i.i, 2		; <i32> [#uses=1]
+	%51 = getelementptr i8* %17, i32 %50		; <i8*> [#uses=1]
+	%52 = bitcast i8* %51 to i32*		; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4		; <i32> [#uses=1]
+	%54 = or i32 %53, -16777216		; <i32> [#uses=1]
+	%tmp1999.i130.i.i = insertelement <4 x i32> undef, i32 %54, i32 0		; <<4 x i32>> [#uses=1]
+	%55 = bitcast <4 x i32> %tmp1999.i130.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%56 = shufflevector <16 x i8> %55, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%57 = bitcast <16 x i8> %56 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%58 = shufflevector <8 x i16> %57, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%59 = bitcast <8 x i16> %58 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%60 = shufflevector <4 x i32> %59, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
+	%61 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %60) nounwind readnone		; <<4 x float>> [#uses=1]
+	%62 = fmul <4 x float> %61, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%63 = fmul <4 x float> %23, zeroinitializer		; <<4 x float>> [#uses=1]
+	%64 = fadd <4 x float> zeroinitializer, %63		; <<4 x float>> [#uses=1]
+	%65 = fmul <4 x float> %36, zeroinitializer		; <<4 x float>> [#uses=1]
+	%66 = fadd <4 x float> zeroinitializer, %65		; <<4 x float>> [#uses=1]
+	%67 = fmul <4 x float> %49, zeroinitializer		; <<4 x float>> [#uses=1]
+	%68 = fadd <4 x float> zeroinitializer, %67		; <<4 x float>> [#uses=1]
+	%69 = fmul <4 x float> %62, zeroinitializer		; <<4 x float>> [#uses=1]
+	%70 = fadd <4 x float> zeroinitializer, %69		; <<4 x float>> [#uses=1]
+	%tmp7452.i.i.i = bitcast <4 x float> %64 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7454.i.i.i = and <4 x i32> %tmp7452.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7459.i.i.i = or <4 x i32> %tmp7454.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7460.i.i.i = bitcast <4 x i32> %tmp7459.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp7468.i.i.i = bitcast <4 x float> %66 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7470.i.i.i = and <4 x i32> %tmp7468.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7475.i.i.i = or <4 x i32> %tmp7470.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7476.i.i.i = bitcast <4 x i32> %tmp7475.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp7479.i.i.i = bitcast <4 x float> %.279.1.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7480.i.i.i = and <4 x i32> zeroinitializer, %tmp7479.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7484.i.i.i = bitcast <4 x float> %68 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7486.i.i.i = and <4 x i32> %tmp7484.i.i.i, %tmp7485.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7491.i.i.i = or <4 x i32> %tmp7486.i.i.i, %tmp7480.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7492.i.i.i = bitcast <4 x i32> %tmp7491.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp7495.i.i.i = bitcast <4 x float> %.380.1.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7496.i.i.i = and <4 x i32> zeroinitializer, %tmp7495.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7500.i.i.i = bitcast <4 x float> %70 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7502.i.i.i = and <4 x i32> %tmp7500.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7507.i.i.i = or <4 x i32> %tmp7502.i.i.i, %tmp7496.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7508.i.i.i = bitcast <4 x i32> %tmp7507.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%indvar.next.i.i.i = add i32 %aniso.0.i.i.i, 1		; <i32> [#uses=1]
+	br label %bb7551.i.i.i
+
+bb7551.i.i.i:		; preds = %bb4426.i.i.i, %entry
+	%.077.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7460.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=0]
+	%.178.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7476.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=0]
+	%.279.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7492.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=1]
+	%.380.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7508.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=1]
+	%aniso.0.i.i.i = phi i32 [ 0, %entry ], [ %indvar.next.i.i.i, %bb4426.i.i.i ]		; <i32> [#uses=1]
+	br i1 false, label %glvmInterpretFPTransformFour6.exit, label %bb4426.i.i.i
+
+glvmInterpretFPTransformFour6.exit:		; preds = %bb7551.i.i.i
+	unreachable
+}
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone

diff --git a/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
new file mode 100644
index 0000000..e97b63d
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i686-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}

diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
new file mode 100644
index 0000000..6c70c5b
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-16-BadShift.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s | not grep shrl
+; Note: this test is really trying to make sure that the shift
+; returns the right result; shrl is most likely wrong,
+; but if CodeGen starts legitimately using an shrl here,
+; please adjust the test appropriately.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
+
+define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
+entry:
+	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
+	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
+	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
+	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
+	ret i64 %x.0
+}
+

diff --git a/test/CodeGen/X86/2008-12-16-dagcombine-4.ll b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
new file mode 100644
index 0000000..3080d08
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-16-dagcombine-4.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = sub i32 %a, %L
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}

diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
new file mode 100644
index 0000000..a6cabc4
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; PR3149
+; Make sure the copy after inline asm is not coalesced away.
+
+; CHECK:         ## InlineAsm End
+; CHECK-NEXT: BB1_2:
+; CHECK-NEXT:    movl	%esi, %eax
+
+
+@"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @umoddi3(i64 %u, i64 %v) nounwind noinline {
+entry:
+	%0 = trunc i64 %v to i32		; <i32> [#uses=2]
+	%1 = trunc i64 %u to i32		; <i32> [#uses=4]
+	%2 = lshr i64 %u, 32		; <i64> [#uses=1]
+	%3 = trunc i64 %2 to i32		; <i32> [#uses=2]
+	%4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 %1) nounwind		; <i32> [#uses=0]
+	%5 = icmp ult i32 %1, %0		; <i1> [#uses=1]
+	br i1 %5, label %bb2, label %bb
+
+bb:		; preds = %entry
+	%6 = lshr i64 %v, 32		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
+	%asmtmp = tail call { i32, i32 } asm "subl $5,$1\0A\09sbbl $3,$0", "=r,=&r,0,imr,1,imr,~{dirflag},~{fpsr},~{flags}"(i32 %3, i32 %7, i32 %1, i32 %0) nounwind		; <{ i32, i32 }> [#uses=2]
+	%asmresult = extractvalue { i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb, %entry
+	%n1.0 = phi i32 [ %asmresult, %bb ], [ %3, %entry ]		; <i32> [#uses=1]
+	%n0.0 = phi i32 [ %asmresult1, %bb ], [ %1, %entry ]		; <i32> [#uses=1]
+	%8 = add i32 %n0.0, %n1.0		; <i32> [#uses=1]
+	ret i32 %8
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/test/CodeGen/X86/2008-12-22-dagcombine-5.ll b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
new file mode 100644
index 0000000..75773e0
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-22-dagcombine-5.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; -(-a) - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = sub i32 %L, %a
+        %1 = sub i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}

diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll
new file mode 100644
index 0000000..2edcaea
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+
+@X = external global [0 x i32]
+
+define void @foo() nounwind {
+entry:
+	%Y = alloca i32
+	call void @frob(i32* %Y) nounwind
+	%Y3 = bitcast i32* %Y to i8*
+	%ctg2 = getelementptr i8* %Y3, i32 ptrtoint ([0 x i32]* @X to i32)
+	%0 = ptrtoint i8* %ctg2 to i32
+	call void @borf(i32 %0) nounwind
+	ret void
+}
+
+define void @bar(i32 %i) nounwind {
+entry:
+	%Y = alloca [10 x i32]
+	%0 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+	call void @frob(i32* %0) nounwind
+	%1 = getelementptr [0 x i32]* @X, i32 0, i32 %i
+	%2 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+	%3 = ptrtoint i32* %2 to i32
+	%4 = bitcast i32* %1 to i8*
+	%ctg2 = getelementptr i8* %4, i32 %3
+	%5 = ptrtoint i8* %ctg2 to i32
+	call void @borf(i32 %5) nounwind
+	ret void
+}
+
+declare void @frob(i32*)
+
+declare void @borf(i32)

diff --git a/test/CodeGen/X86/2008-12-23-dagcombine-6.ll b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
new file mode 100644
index 0000000..bae9283
--- /dev/null
+++ b/test/CodeGen/X86/2008-12-23-dagcombine-6.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 4
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = add i32 %a, %L
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}
+define i32 @test2(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = add i32 %L, %a
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}

diff --git a/test/CodeGen/X86/2009-01-12-CoalescerBug.ll b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll
new file mode 100644
index 0000000..27a7113
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-12-CoalescerBug.ll

@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | grep movq | count 2
+; PR3311
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] }
+	%struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base }
+	%struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_edge_gc = type { %struct.VEC_edge_base }
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] }
+	%struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base }
+	%struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct.VEC_tree_gc = type { %struct.VEC_tree_base }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 }
+	%struct.basic_block_il_dependent = type { %struct.rtl_bb_info* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 }
+	%struct.c_arg_info = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i8 }
+	%struct.c_language_function = type { %struct.stmt_tree_s }
+	%struct.c_switch = type opaque
+	%struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type opaque
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, i32, %struct.location_t, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type { i8 }
+	%struct.language_function = type { %struct.c_language_function, %struct.tree_node*, %struct.tree_node*, %struct.c_switch*, %struct.c_arg_info*, i32, i32, i32, i32 }
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type opaque
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, [4 x i32], i32, i32, i32 }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.omp_clause_subcode = type { i32 }
+	%struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.stack_local_entry = type opaque
+	%struct.stmt_tree_s = type { %struct.tree_node*, i32 }
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, i8, i32, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_minimal = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def*, i32 }
+	%struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
+	%struct.tree_function_decl = type { %struct.tree_decl_non_common, i32, i8, i8, i64, %struct.function* }
+	%struct.tree_node = type { %struct.tree_function_decl }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%union.tree_ann_d = type opaque
+@.str1 = external constant [31 x i8]		; <[31 x i8]*> [#uses=1]
+@integer_types = external global [11 x %struct.tree_node*]		; <[11 x %struct.tree_node*]*> [#uses=1]
+@__FUNCTION__.31164 = external constant [23 x i8], align 16		; <[23 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @c_common_type_for_size to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @c_common_type_for_size(i32 %bits, i32 %unsignedp) nounwind {
+entry:
+	%0 = load %struct.tree_node** getelementptr ([11 x %struct.tree_node*]* @integer_types, i32 0, i64 5), align 8		; <%struct.tree_node*> [#uses=1]
+	br i1 false, label %bb16, label %bb
+
+bb:		; preds = %entry
+	tail call void @tree_class_check_failed(%struct.tree_node* %0, i32 2, i8* getelementptr ([31 x i8]* @.str1, i32 0, i64 0), i32 1785, i8* getelementptr ([23 x i8]* @__FUNCTION__.31164, i32 0, i32 0)) noreturn nounwind
+	unreachable
+
+bb16:		; preds = %entry
+	%tmp = add i32 %bits, %unsignedp		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*) noreturn

diff --git a/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
new file mode 100644
index 0000000..9c71469
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  -disable-mmx -enable-legalize-types-checking
+
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define void @__mindd16(<16 x double>* sret %vec.result, <16 x double> %x, double %y) nounwind {
+entry:
+	%tmp3.i = shufflevector <16 x double> zeroinitializer, <16 x double> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x double>> [#uses=1]
+	%tmp10.i.i = shufflevector <8 x double> %tmp3.i, <8 x double> undef, <4 x i32> < i32 4, i32 5, i32 6, i32 7 >		; <<4 x double>> [#uses=1]
+	%tmp3.i2.i.i = shufflevector <4 x double> %tmp10.i.i, <4 x double> undef, <2 x i32> < i32 0, i32 1 >		; <<2 x double>> [#uses=1]
+	%0 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> zeroinitializer, <2 x double> %tmp3.i2.i.i) nounwind		; <<2 x double>> [#uses=1]
+	%tmp5.i3.i.i = shufflevector <2 x double> %0, <2 x double> undef, <4 x i32> < i32 0, i32 1, i32 undef, i32 undef >		; <<4 x double>> [#uses=1]
+	%tmp6.i4.i.i = shufflevector <4 x double> zeroinitializer, <4 x double> %tmp5.i3.i.i, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x double>> [#uses=1]
+	%tmp14.i8.i.i = shufflevector <4 x double> %tmp6.i4.i.i, <4 x double> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >		; <<4 x double>> [#uses=1]
+	%tmp13.i.i = shufflevector <4 x double> %tmp14.i8.i.i, <4 x double> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef >		; <<8 x double>> [#uses=1]
+	%tmp14.i.i = shufflevector <8 x double> zeroinitializer, <8 x double> %tmp13.i.i, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11 >		; <<8 x double>> [#uses=1]
+	%tmp5.i = shufflevector <8 x double> %tmp14.i.i, <8 x double> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >		; <<16 x double>> [#uses=1]
+	%tmp6.i = shufflevector <16 x double> %x, <16 x double> %tmp5.i, <16 x i32> < i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >		; <<16 x double>> [#uses=1]
+	%tmp14.i = shufflevector <16 x double> %tmp6.i, <16 x double> zeroinitializer, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23 >		; <<16 x double>> [#uses=1]
+	store <16 x double> %tmp14.i, <16 x double>* %vec.result
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
new file mode 100644
index 0000000..99bef6c
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://6501631
+
+	%CF = type { %Register }
+	%XXV = type { i32 (...)** }
+	%Register = type { %"struct.XXC::BCFs", i32 }
+	%"struct.XXC::BCFs" = type { i32 }
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+define fastcc %XXV* @bar(%CF* %call_frame, %XXV** %exception) nounwind {
+prologue:
+	%param_x = load %XXV** null		; <%XXV*> [#uses=1]
+	%unique_1.i = ptrtoint %XXV* %param_x to i1		; <i1> [#uses=1]
+	br i1 %unique_1.i, label %NextVerify42, label %FailedVerify
+
+NextVerify42:		; preds = %prologue
+	%param_y = load %XXV** null		; <%XXV*> [#uses=1]
+	%unique_1.i58 = ptrtoint %XXV* %param_y to i1		; <i1> [#uses=1]
+	br i1 %unique_1.i58, label %function_setup.cont, label %FailedVerify
+
+function_setup.cont:		; preds = %NextVerify42
+	br i1 false, label %label13, label %label
+
+label:		; preds = %function_setup.cont
+	%has_exn = icmp eq %XXV* null, null		; <i1> [#uses=1]
+	br i1 %has_exn, label %kjsNumberLiteral.exit, label %handle_exception
+
+kjsNumberLiteral.exit:		; preds = %label
+	%0 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 0, i32 0)		; <{ i32, i1 }> [#uses=2]
+	%intAdd = extractvalue { i32, i1 } %0, 0		; <i32> [#uses=2]
+	%intAddOverflow = extractvalue { i32, i1 } %0, 1		; <i1> [#uses=1]
+	%toint56 = ashr i32 %intAdd, 1		; <i32> [#uses=1]
+	%toFP57 = sitofp i32 %toint56 to double		; <double> [#uses=1]
+	br i1 %intAddOverflow, label %rematerializeAdd, label %label13
+
+label13:		; preds = %kjsNumberLiteral.exit, %function_setup.cont
+	%var_lr1.0 = phi double [ %toFP57, %kjsNumberLiteral.exit ], [ 0.000000e+00, %function_setup.cont ]		; <double> [#uses=0]
+	unreachable
+
+FailedVerify:		; preds = %NextVerify42, %prologue
+	ret %XXV* null
+
+rematerializeAdd:		; preds = %kjsNumberLiteral.exit
+	%rematerializedInt = sub i32 %intAdd, 0		; <i32> [#uses=0]
+	ret %XXV* null
+
+handle_exception:		; preds = %label
+	ret %XXV* undef
+}

diff --git a/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/test/CodeGen/X86/2009-01-16-UIntToFP.ll
new file mode 100644
index 0000000..2eab5f1
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-16-UIntToFP.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define hidden float @__floatundisf(i64 %u) nounwind readnone {
+entry:
+	%0 = icmp ugt i64 %u, 9007199254740991		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %entry
+	%1 = and i64 %u, 2047		; <i64> [#uses=1]
+	%2 = icmp eq i64 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%3 = or i64 %u, 2048		; <i64> [#uses=1]
+	%4 = and i64 %3, -2048		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1, %bb, %entry
+	%u_addr.0 = phi i64 [ %4, %bb1 ], [ %u, %entry ], [ %u, %bb ]		; <i64> [#uses=2]
+	%5 = lshr i64 %u_addr.0, 32		; <i64> [#uses=1]
+	%6 = trunc i64 %5 to i32		; <i32> [#uses=1]
+	%7 = uitofp i32 %6 to double		; <double> [#uses=1]
+	%8 = fmul double %7, 0x41F0000000000000		; <double> [#uses=1]
+	%9 = trunc i64 %u_addr.0 to i32		; <i32> [#uses=1]
+	%10 = uitofp i32 %9 to double		; <double> [#uses=1]
+	%11 = fadd double %10, %8		; <double> [#uses=1]
+	%12 = fptrunc double %11 to float		; <float> [#uses=1]
+	ret float %12
+}

diff --git a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
new file mode 100644
index 0000000..f895336
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s
+; rdar://6505632
+; reduced from 483.xalancbmk
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.xercesc_2_5::ASCIIRangeFactory" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", i8, i8 }
+@_ZN11xercesc_2_5L17gIdeographicCharsE = external constant [7 x i16]		; <[7 x i16]*> [#uses=3]
+
+define void @_ZN11xercesc_2_515XMLRangeFactory11buildRangesEv(%"struct.xercesc_2_5::ASCIIRangeFactory"* %this) {
+entry:
+	br i1 false, label %bb5, label %return
+
+bb5:		; preds = %entry
+	br label %bb4.i.i
+
+bb4.i.i:		; preds = %bb4.i.i, %bb5
+	br i1 false, label %bb.i51, label %bb4.i.i
+
+bb.i51:		; preds = %bb.i51, %bb4.i.i
+	br i1 false, label %bb4.i.i70, label %bb.i51
+
+bb4.i.i70:		; preds = %bb4.i.i70, %bb.i51
+	br i1 false, label %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73, label %bb4.i.i70
+
+_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73:		; preds = %bb4.i.i70
+	%0 = load i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 add (i32 ashr (i32 sub (i32 ptrtoint (i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32), i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32)), i32 1), i32 1)), align 4		; <i16> [#uses=0]
+	br label %bb4.i5.i141
+
+bb4.i5.i141:		; preds = %bb4.i5.i141, %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73
+	br label %bb4.i5.i141
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
new file mode 100644
index 0000000..0583ef1
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
+; PR3402
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+        %struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
+
+define void @foo() nounwind {
+entry:
+        %termios = alloca %struct.ktermios, align 8
+        %termios1 = bitcast %struct.ktermios* %termios to i8*
+        call void @llvm.memset.i64(i8* %termios1, i8 0, i64 44, i32 8)
+        call void @bar(%struct.ktermios* %termios) nounwind
+        ret void
+}
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+
+declare void @bar(%struct.ktermios*)
+

diff --git a/test/CodeGen/X86/2009-01-26-WrongCheck.ll b/test/CodeGen/X86/2009-01-26-WrongCheck.ll
new file mode 100644
index 0000000..117ff47
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-26-WrongCheck.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -enable-legalize-types-checking
+; PR3393
+
+define void @foo(i32 inreg %x) {
+	%t709 = select i1 false, i32 0, i32 %x		; <i32> [#uses=1]
+	%t711 = add i32 %t709, 1		; <i32> [#uses=4]
+	%t801 = icmp slt i32 %t711, 0		; <i1> [#uses=1]
+	%t712 = zext i32 %t711 to i64		; <i64> [#uses=1]
+	%t804 = select i1 %t801, i64 0, i64 %t712		; <i64> [#uses=1]
+	store i64 %t804, i64* null
+	%t815 = icmp slt i32 %t711, 0		; <i1> [#uses=1]
+	%t814 = sext i32 %t711 to i64		; <i64> [#uses=1]
+	%t816 = select i1 %t815, i64 0, i64 %t814		; <i64> [#uses=1]
+	store i64 %t816, i64* null
+	unreachable
+}

diff --git a/test/CodeGen/X86/2009-01-27-NullStrings.ll b/test/CodeGen/X86/2009-01-27-NullStrings.ll
new file mode 100644
index 0000000..8684f4a
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-27-NullStrings.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; CHECK: .section __TEXT,__cstring,cstring_literals
+
+@x = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+
+@y = global [1 x i8]* @x
+

diff --git a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
new file mode 100644
index 0000000..ce3ea82
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim
+; rdar://6538384
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.Lit = type { i32 }
+	%struct.StreamBuffer = type { %struct.FILE*, [1048576 x i8], i32, i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+
+declare fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer*)
+
+declare i8* @llvm.eh.exception() nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) noreturn {
+entry:
+	%0 = invoke fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer* null)
+			to label %bb1.i16.i.i unwind label %lpad.i.i		; <i32> [#uses=0]
+
+bb1.i16.i.i:		; preds = %entry
+	br i1 false, label %bb.i.i.i.i, label %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
+
+bb.i.i.i.i:		; preds = %bb1.i16.i.i
+	br label %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
+
+_ZN3vecI3LitE4pushERKS0_.exit.i.i.i:		; preds = %bb.i.i.i.i, %bb1.i16.i.i
+	%lits.i.i.0.0 = phi %struct.Lit* [ null, %bb1.i16.i.i ], [ null, %bb.i.i.i.i ]		; <%struct.Lit*> [#uses=1]
+	%1 = invoke fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer* null)
+			to label %.noexc21.i.i unwind label %lpad.i.i		; <i32> [#uses=0]
+
+.noexc21.i.i:		; preds = %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
+	unreachable
+
+lpad.i.i:		; preds = %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i, %entry
+	%lits.i.i.0.3 = phi %struct.Lit* [ %lits.i.i.0.0, %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i ], [ null, %entry ]		; <%struct.Lit*> [#uses=1]
+	%eh_ptr.i.i = call i8* @llvm.eh.exception()		; <i8*> [#uses=0]
+	free %struct.Lit* %lits.i.i.0.3
+	unreachable
+}

diff --git a/test/CodeGen/X86/2009-01-31-BigShift.ll b/test/CodeGen/X86/2009-01-31-BigShift.ll
new file mode 100644
index 0000000..4eb0ec1
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-31-BigShift.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | not grep and
+; PR3401
+
+define void @x(i288 %i) nounwind {
+	call void @add(i288 %i)
+	ret void
+}
+
+declare void @add(i288)

diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
new file mode 100644
index 0000000..9d24084
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | grep {mov.*56}
+; PR3449
+
+define void @test(<8 x double>* %P, i64* %Q) nounwind {
+	%A = load <8 x double>* %P		; <<8 x double>> [#uses=1]
+	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
+	%C = lshr i512 %B, 448		; <i512> [#uses=1]
+	%D = trunc i512 %C to i64		; <i64> [#uses=1]
+	volatile store i64 %D, i64* %Q
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-01-31-BigShift3.ll b/test/CodeGen/X86/2009-01-31-BigShift3.ll
new file mode 100644
index 0000000..1b531e3
--- /dev/null
+++ b/test/CodeGen/X86/2009-01-31-BigShift3.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86
+; PR3450
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.BitMap = type { i8* }
+	%struct.BitMapListStruct = type { %struct.BitMap, %struct.BitMapListStruct*, %struct.BitMapListStruct* }
+	%struct.Material = type { float, float, float, %struct.Material*, %struct.Material* }
+	%struct.ObjPoint = type { double, double, double, double, double, double }
+	%struct.ObjectStruct = type { [57 x i8], %struct.PointListStruct*, %struct.Poly3Struct*, %struct.Poly4Struct*, %struct.Texture*, %struct.Material*, %struct.Point, i32, i32, %struct.Point, %struct.Point, %struct.Point, %struct.ObjectStruct*, %struct.ObjectStruct*, i32, i32, i32, i32, i32, i32, i32, %struct.ObjectStruct*, %struct.ObjectStruct* }
+	%struct.Point = type { double, double, double }
+	%struct.PointListStruct = type { %struct.ObjPoint*, %struct.PointListStruct*, %struct.PointListStruct* }
+	%struct.Poly3Struct = type { [3 x %struct.ObjPoint*], %struct.Material*, %struct.Texture*, %struct.Poly3Struct*, %struct.Poly3Struct* }
+	%struct.Poly4Struct = type { [4 x %struct.ObjPoint*], %struct.Material*, %struct.Texture*, %struct.Poly4Struct*, %struct.Poly4Struct* }
+	%struct.Texture = type { %struct.Point, %struct.BitMapListStruct*, %struct.Point, %struct.Point, %struct.Point, %struct.Texture*, %struct.Texture* }
+
+define fastcc void @ScaleObjectAdd(%struct.ObjectStruct* %o, double %sx, double %sy, double %sz) nounwind {
+entry:
+	%sz101112.ins = or i960 0, 0		; <i960> [#uses=1]
+	br i1 false, label %return, label %bb1.preheader
+
+bb1.preheader:		; preds = %entry
+	%0 = lshr i960 %sz101112.ins, 640		; <i960> [#uses=0]
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.preheader
+	br label %bb1
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-02-01-LargeMask.ll b/test/CodeGen/X86/2009-02-01-LargeMask.ll
new file mode 100644
index 0000000..c4042e6
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-01-LargeMask.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86
+; PR3453
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%struct.cl_engine = type { i32, i16, i32, i8**, i8**, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+	%struct.cl_limits = type { i32, i32, i32, i32, i16, i32 }
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_ctx = type { i8**, i32*, %struct.cli_matcher*, %struct.cl_engine*, %struct.cl_limits*, i32, i32, i32, i32, %struct.cli_dconf* }
+	%struct.cli_dconf = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define fastcc i32 @cli_scanautoit(i32 %desc, %struct.cli_ctx* %ctx, i32 %offset) nounwind {
+entry:
+	br i1 false, label %bb.i49.i72, label %bb14
+
+bb.i49.i72:		; preds = %bb.i49.i72, %entry
+	%UNP.i1482.0 = phi i288 [ %.ins659, %bb.i49.i72 ], [ undef, %entry ]		; <i288> [#uses=1]
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 17834		; <i32> [#uses=1]
+	%2 = zext i32 %1 to i288		; <i288> [#uses=1]
+	%3 = shl i288 %2, 160		; <i288> [#uses=1]
+	%UNP.i1482.in658.mask = and i288 %UNP.i1482.0, -6277101733925179126504886505003981583386072424808101969921		; <i288> [#uses=1]
+	%.ins659 = or i288 %3, %UNP.i1482.in658.mask		; <i288> [#uses=1]
+	br label %bb.i49.i72
+
+bb14:		; preds = %entry
+	ret i32 -123
+}

diff --git a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
new file mode 100644
index 0000000..e75af13
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR3411
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define void @bar(i64 %p_66) nounwind {
+entry:
+	br i1 false, label %bb, label %bb1
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%2 = or i64 %1, %p_66		; <i64> [#uses=1]
+	%3 = shl i64 %2, 0		; <i64> [#uses=1]
+	%4 = and i64 %3, %p_66		; <i64> [#uses=1]
+	%5 = icmp eq i64 %4, 1		; <i1> [#uses=1]
+	%6 = trunc i64 %p_66 to i32		; <i32> [#uses=2]
+	%7 = or i32 %0, %6		; <i32> [#uses=2]
+	%8 = sub i32 %7, %6		; <i32> [#uses=1]
+	%iftmp.0.0 = select i1 %5, i32 %8, i32 %7		; <i32> [#uses=1]
+	%9 = tail call i32 @foo(i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @foo(i32)

diff --git a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
new file mode 100644
index 0000000..4880f62
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s | grep p-92
+; PR3481
+; The offset should print as -92, not +17179869092
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@p = common global [10 x i32] zeroinitializer, align 4          ; <[10 x i32]*>
+@g = global [1 x i32*] [ i32* bitcast (i8* getelementptr (i8* bitcast
+([10 x i32]* @p to i8*), i64 17179869092) to i32*) ], align 4 

diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
new file mode 100644
index 0000000..0ffa8fd
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss  | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movaps | count 4
+
+define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
+newFuncRoot:
+	%tmp82 = insertelement <4 x float> %wr.2178, float 0.000000e+00, i32 0		; <<4 x float>> [#uses=1]
+	%tmp85 = insertelement <4 x float> %tmp82, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp87 = insertelement <4 x float> %tmp85, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp89 = insertelement <4 x float> %tmp87, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp89, <4 x float>* %tmp89.out
+	ret i1 false
+}

diff --git a/test/CodeGen/X86/2009-02-08-CoalescerBug.ll b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
new file mode 100644
index 0000000..908cc08
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-08-CoalescerBug.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86
+; PR3486
+
+define i32 @foo(i8 signext %p_26) nounwind {
+entry:
+	%0 = icmp eq i8 %p_26, 0		; <i1> [#uses=2]
+	%or.cond = or i1 false, %0		; <i1> [#uses=2]
+	%iftmp.1.0 = zext i1 %or.cond to i16		; <i16> [#uses=1]
+	br i1 %0, label %bb.i, label %bar.exit
+
+bb.i:		; preds = %entry
+	%1 = zext i1 %or.cond to i32		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 0		; <i32> [#uses=1]
+	%3 = trunc i32 %2 to i16		; <i16> [#uses=1]
+	br label %bar.exit
+
+bar.exit:		; preds = %bb.i, %entry
+	%4 = phi i16 [ %3, %bb.i ], [ %iftmp.1.0, %entry ]		; <i16> [#uses=1]
+	%5 = trunc i16 %4 to i8		; <i8> [#uses=1]
+	%6 = sext i8 %5 to i32		; <i32> [#uses=1]
+	ret i32 %6
+}

diff --git a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
new file mode 100644
index 0000000..1284b0d
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s
+; PR3537
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.GetBitContext = type <{ i8*, i8*, i32, i32 }>
+
+define i32 @alac_decode_frame() nounwind {
+entry:
+	%tmp2 = load i8** null		; <i8*> [#uses=2]
+	%tmp34 = getelementptr i8* %tmp2, i32 4		; <i8*> [#uses=2]
+	%tmp5.i424 = bitcast i8* %tmp34 to i8**		; <i8**> [#uses=2]
+	%tmp15.i = getelementptr i8* %tmp2, i32 12		; <i8*> [#uses=1]
+	%0 = bitcast i8* %tmp15.i to i32*		; <i32*> [#uses=1]
+	br i1 false, label %if.then43, label %if.end47
+
+if.then43:		; preds = %entry
+	ret i32 0
+
+if.end47:		; preds = %entry
+	%tmp5.i590 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	store i32 19, i32* %0
+	%tmp6.i569 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	%1 = call i32 asm "bswap   $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0) nounwind		; <i32> [#uses=0]
+	br i1 false, label %bb.nph, label %if.then63
+
+if.then63:		; preds = %if.end47
+	unreachable
+
+bb.nph:		; preds = %if.end47
+	%2 = bitcast i8* %tmp34 to %struct.GetBitContext*		; <%struct.GetBitContext*> [#uses=1]
+	%call9.i = call fastcc i32 @decode_scalar(%struct.GetBitContext* %2, i32 0, i32 0, i32 0) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare fastcc i32 @decode_scalar(%struct.GetBitContext* nocapture, i32, i32, i32) nounwind

diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
new file mode 100644
index 0000000..72c7ee9
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s
+; RUN: llc < %s -march=x86-64
+; PR3538
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.block.type = type { i32, { }* }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.subrange.type = type { i32, i64, i64 }
+	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [4 x i8] c"t.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [2 x i8] c".\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@.str2 = internal constant [6 x i8] c"clang\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str2, i32 0, i32 0), i1 false, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str4 = internal constant [5 x i8] c"test\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str5 = internal constant [2 x i8] c"X\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+@llvm.dbg.block = internal constant %llvm.dbg.block.type { i32 458763, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*) }, section "llvm.metadata"		; <%llvm.dbg.block.type*> [#uses=1]
+@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 0 }, section "llvm.metadata"		; <%llvm.dbg.subrange.type*> [#uses=1]
+@llvm.dbg.array = internal constant [1 x { }*] [{ }* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to { }*)], section "llvm.metadata"		; <[1 x { }*]*> [#uses=1]
+@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458753, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 0, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast ([1 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str6 = internal constant [2 x i8] c"Y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable7 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*), i8* getelementptr ([2 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+
+define i32 @test(i32 %X) nounwind {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%X.addr = alloca i32		; <i32*> [#uses=3]
+	%saved_stack = alloca i8*		; <i8**> [#uses=2]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	store i32 %X, i32* %X.addr
+	%0 = bitcast i32* %X.addr to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
+	call void @llvm.dbg.region.start({ }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*))
+	call void @llvm.dbg.stoppoint(i32 4, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%1 = call i8* @llvm.stacksave()		; <i8*> [#uses=1]
+	store i8* %1, i8** %saved_stack
+	%tmp = load i32* %X.addr		; <i32> [#uses=1]
+	%2 = mul i32 4, %tmp		; <i32> [#uses=1]
+	%vla = alloca i8, i32 %2		; <i8*> [#uses=1]
+	%tmp1 = bitcast i8* %vla to i32*		; <i32*> [#uses=1]
+	%3 = bitcast i32* %tmp1 to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %3, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable7 to { }*))
+	call void @llvm.dbg.stoppoint(i32 5, i32 1, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.block.type* @llvm.dbg.block to { }*))
+	br label %cleanup
+
+cleanup:		; preds = %entry
+	%tmp2 = load i8** %saved_stack		; <i8*> [#uses=1]
+	call void @llvm.stackrestore(i8* %tmp2)
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	%4 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %4
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind
+
+declare void @llvm.dbg.region.start({ }*) nounwind
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+declare void @llvm.dbg.region.end({ }*) nounwind

diff --git a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
new file mode 100644
index 0000000..2e148ad
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep {\$-81920} | count 3
+; RUN: llc < %s -march=x86 | grep {\$4294885376} | count 1
+
+; ModuleID = 'shant.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define void @f() nounwind {
+entry:
+	call void asm sideeffect "foo $0", "n,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "i,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "e,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "Z,~{dirflag},~{fpsr},~{flags}"(i64 4294885376) nounwind
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/test/CodeGen/X86/2009-02-12-SpillerBug.ll
new file mode 100644
index 0000000..4f8a5e7
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-12-SpillerBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8
+; PR3561
+
+define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
+entry:
+	%0 = fmul x86_fp80 %b, %d		; <x86_fp80> [#uses=1]
+	%1 = fsub x86_fp80 0xK00000000000000000000, %0		; <x86_fp80> [#uses=1]
+	%2 = fadd x86_fp80 0xK00000000000000000000, 0xK00000000000000000000		; <x86_fp80> [#uses=1]
+	%3 = fcmp uno x86_fp80 %1, 0xK00000000000000000000		; <i1> [#uses=1]
+	%4 = fcmp uno x86_fp80 %2, 0xK00000000000000000000		; <i1> [#uses=1]
+	%or.cond = and i1 %3, %4		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb47, label %bb71
+
+bb47:		; preds = %entry
+	%5 = fcmp uno x86_fp80 %a, 0xK00000000000000000000		; <i1> [#uses=1]
+	br i1 %5, label %bb60, label %bb62
+
+bb60:		; preds = %bb47
+	%6 = tail call x86_fp80 @copysignl(x86_fp80 0xK00000000000000000000, x86_fp80 %a) nounwind readnone		; <x86_fp80> [#uses=0]
+	br label %bb62
+
+bb62:		; preds = %bb60, %bb47
+	unreachable
+
+bb71:		; preds = %entry
+	ret void
+}
+
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone

diff --git a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
new file mode 100644
index 0000000..58a7f9f
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
+
+define i32 @main() nounwind {
+bb4.i.thread:
+	br label %bb5.i4
+
+bb16:		; preds = %bb111.i
+	%phitmp = add i32 %indvar.reg2mem.4, 1		; <i32> [#uses=2]
+	switch i32 %indvar.reg2mem.4, label %bb100.i [
+		i32 0, label %bb5.i4
+		i32 1, label %bb5.i4
+		i32 2, label %bb5.i4
+		i32 5, label %bb.i14.i
+		i32 6, label %bb.i14.i
+		i32 7, label %bb.i14.i
+	]
+
+bb5.i4:		; preds = %bb16, %bb16, %bb16, %bb4.i.thread
+	br i1 false, label %bb102.i, label %bb103.i
+
+bb.i14.i:		; preds = %bb16, %bb16, %bb16
+	%0 = malloc [600 x i32]		; <[600 x i32]*> [#uses=0]
+	%1 = icmp eq i32 %phitmp, 7		; <i1> [#uses=1]
+	%tl.0.i = select i1 %1, float 1.000000e+02, float 1.000000e+00		; <float> [#uses=1]
+	%2 = icmp eq i32 %phitmp, 8		; <i1> [#uses=1]
+	%tu.0.i = select i1 %2, float 1.000000e+02, float 1.000000e+00		; <float> [#uses=1]
+	br label %bb30.i
+
+bb30.i:		; preds = %bb36.i, %bb.i14.i
+	%i.1173.i = phi i32 [ 0, %bb.i14.i ], [ %indvar.next240.i, %bb36.i ]		; <i32> [#uses=3]
+	%3 = icmp eq i32 0, %i.1173.i		; <i1> [#uses=1]
+	br i1 %3, label %bb33.i, label %bb34.i
+
+bb33.i:		; preds = %bb30.i
+	store float %tl.0.i, float* null, align 4
+	br label %bb36.i
+
+bb34.i:		; preds = %bb30.i
+	%4 = icmp eq i32 0, %i.1173.i		; <i1> [#uses=1]
+	br i1 %4, label %bb35.i, label %bb36.i
+
+bb35.i:		; preds = %bb34.i
+	store float %tu.0.i, float* null, align 4
+	br label %bb36.i
+
+bb36.i:		; preds = %bb35.i, %bb34.i, %bb33.i
+	%indvar.next240.i = add i32 %i.1173.i, 1		; <i32> [#uses=1]
+	br label %bb30.i
+
+bb100.i:		; preds = %bb16
+	ret i32 0
+
+bb102.i:		; preds = %bb5.i4
+	br label %bb103.i
+
+bb103.i:		; preds = %bb102.i, %bb5.i4
+	%indvar.reg2mem.4 = phi i32 [ 0, %bb5.i4 ], [ 0, %bb102.i ]		; <i32> [#uses=2]
+	%n.0.reg2mem.1.i = phi i32 [ 0, %bb102.i ], [ 0, %bb5.i4 ]		; <i32> [#uses=1]
+	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb111.i, label %bb108.i
+
+bb108.i:		; preds = %bb103.i
+	ret i32 0
+
+bb111.i:		; preds = %bb103.i
+	%6 = icmp sgt i32 %n.0.reg2mem.1.i, 7		; <i1> [#uses=1]
+	br i1 %6, label %bb16, label %bb112.i
+
+bb112.i:		; preds = %bb111.i
+	unreachable
+}

diff --git a/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
new file mode 100644
index 0000000..b3dd13c
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s | grep weak | count 3
+; PR3629
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-freebsd7.1"
+module asm ".ident\09\22$FreeBSD$\22"
+	%struct.anon = type <{ %struct.uart_devinfo* }>
+	%struct.lock_object = type <{ i8*, i32, i32, %struct.witness* }>
+	%struct.mtx = type <{ %struct.lock_object, i64 }>
+	%struct.uart_bas = type <{ i64, i64, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.uart_class = type opaque
+	%struct.uart_devinfo = type <{ %struct.anon, %struct.uart_ops*, %struct.uart_bas, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32 (%struct.uart_softc*)*, i32 (%struct.uart_softc*)*, i8*, %struct.mtx* }>
+	%struct.uart_ops = type <{ i32 (%struct.uart_bas*)*, void (%struct.uart_bas*, i32, i32, i32, i32)*, void (%struct.uart_bas*)*, void (%struct.uart_bas*, i32)*, i32 (%struct.uart_bas*)*, i32 (%struct.uart_bas*, %struct.mtx*)* }>
+	%struct.uart_softc = type opaque
+	%struct.witness = type opaque
+
+@uart_classes = internal global [3 x %struct.uart_class*] [%struct.uart_class* @uart_ns8250_class, %struct.uart_class* @uart_sab82532_class, %struct.uart_class* @uart_z8530_class], align 8		; <[3 x %struct.uart_class*]*> [#uses=1]
+@uart_ns8250_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]
+@uart_sab82532_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]
+@uart_z8530_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]

diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
new file mode 100644
index 0000000..7ea6998
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; rdar://6608609
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+	%tmp.i2 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
+	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
+	%0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %0, %C		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp.i
+}
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone

diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
new file mode 100644
index 0000000..a4d642b
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse3 -stats |& not grep {machine-licm}
+; rdar://6627786
+
+target triple = "x86_64-apple-darwin10.0"
+	%struct.Key = type { i64 }
+	%struct.__Rec = type opaque
+	%struct.__vv = type {  }
+
+define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb.i, %bb26, %bb4, %entry
+	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
+	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
+	%1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind		; <i32> [#uses=1]
+	%cond = icmp eq i32 %1, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb26, label %bb4
+
+bb26:		; preds = %bb4
+	%2 = and i64 %ins, 15728640		; <i64> [#uses=1]
+	%cond.i = icmp eq i64 %2, 1048576		; <i1> [#uses=1]
+	br i1 %cond.i, label %bb.i, label %bb4
+
+bb.i:		; preds = %bb26
+	%3 = load i32* null, align 4		; <i32> [#uses=1]
+	%4 = uitofp i32 %3 to float		; <float> [#uses=1]
+	%.sum13.i = add i64 0, 4		; <i64> [#uses=1]
+	%5 = getelementptr i8* null, i64 %.sum13.i		; <i8*> [#uses=1]
+	%6 = bitcast i8* %5 to i32*		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = uitofp i32 %7 to float		; <float> [#uses=1]
+	%.sum.i = add i64 0, 8		; <i64> [#uses=1]
+	%9 = getelementptr i8* null, i64 %.sum.i		; <i8*> [#uses=1]
+	%10 = bitcast i8* %9 to i32*		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%12 = uitofp i32 %11 to float		; <float> [#uses=1]
+	%13 = insertelement <4 x float> undef, float %4, i32 0		; <<4 x float>> [#uses=1]
+	%14 = insertelement <4 x float> %13, float %8, i32 1		; <<4 x float>> [#uses=1]
+	%15 = insertelement <4 x float> %14, float %12, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %15, <4 x float>* null, align 16
+	br label %bb4
+}
+
+declare i32 @xxGetOffsetForCode(...)
+
+declare i32 @xxCalculateMidType(...)

diff --git a/test/CodeGen/X86/2009-03-03-BTHang.ll b/test/CodeGen/X86/2009-03-03-BTHang.ll
new file mode 100644
index 0000000..bb95925
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-03-BTHang.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86
+; rdar://6642541
+
+ 	%struct.HandleBlock = type { [30 x i32], [990 x i8*], %struct.HandleBlockTrailer }
+ 	%struct.HandleBlockTrailer = type { %struct.HandleBlock* }
+
+define hidden zeroext i8 @IsHandleAllocatedFromPool(i8** %h) nounwind optsize {
+entry:
+	%0 = ptrtoint i8** %h to i32		; <i32> [#uses=2]
+	%1 = and i32 %0, -4096		; <i32> [#uses=1]
+	%2 = inttoptr i32 %1 to %struct.HandleBlock*		; <%struct.HandleBlock*> [#uses=3]
+	%3 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4096		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 1751280747		; <i1> [#uses=1]
+	br i1 %5, label %bb, label %bb1
+
+bb:		; preds = %entry
+	%6 = getelementptr %struct.HandleBlock* %2, i32 0, i32 1		; <[990 x i8*]*> [#uses=1]
+	%7 = ptrtoint [990 x i8*]* %6 to i32		; <i32> [#uses=1]
+	%8 = sub i32 %0, %7		; <i32> [#uses=2]
+	%9 = lshr i32 %8, 2		; <i32> [#uses=1]
+	%10 = ashr i32 %8, 7		; <i32> [#uses=1]
+	%11 = and i32 %10, 134217727		; <i32> [#uses=1]
+	%12 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 %11		; <i32*> [#uses=1]
+	%not.i = and i32 %9, 31		; <i32> [#uses=1]
+	%13 = xor i32 %not.i, 31		; <i32> [#uses=1]
+	%14 = shl i32 1, %13		; <i32> [#uses=1]
+	%15 = load i32* %12, align 4		; <i32> [#uses=1]
+	%16 = and i32 %15, %14		; <i32> [#uses=1]
+	%17 = icmp eq i32 %16, 0		; <i1> [#uses=1]
+	%tmp = zext i1 %17 to i8		; <i8> [#uses=1]
+	ret i8 %tmp
+
+bb1:		; preds = %entry
+	ret i8 0
+}
+

diff --git a/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
new file mode 100644
index 0000000..9deeceb
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+; PR3686
+; rdar://6661799
+
+define i32 @x(i32 %y) nounwind readnone {
+entry:
+	%tmp14 = zext i32 %y to i80		; <i80> [#uses=1]
+	%tmp15 = bitcast i80 %tmp14 to x86_fp80		; <x86_fp80> [#uses=1]
+	%add = fadd x86_fp80 %tmp15, 0xK3FFF8000000000000000		; <x86_fp80> [#uses=1]
+	%tmp11 = bitcast x86_fp80 %add to i80		; <i80> [#uses=1]
+	%tmp10 = trunc i80 %tmp11 to i32		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+

diff --git a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
new file mode 100644
index 0000000..411a0c9
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+external global i32		; <i32*>:0 [#uses=1]
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+define fastcc i8* @1(i8*) nounwind {
+	br i1 false, label %3, label %2
+
+; <label>:2		; preds = %1
+	ret i8* %0
+
+; <label>:3		; preds = %1
+	%4 = call i64 @strlen(i8* %0) nounwind readonly		; <i64> [#uses=1]
+	%5 = trunc i64 %4 to i32		; <i32> [#uses=2]
+	%6 = load i32* @0, align 4		; <i32> [#uses=1]
+	%7 = sub i32 %5, %6		; <i32> [#uses=2]
+	%8 = sext i32 %5 to i64		; <i64> [#uses=1]
+	%9 = sext i32 %7 to i64		; <i64> [#uses=1]
+	%10 = sub i64 %8, %9		; <i64> [#uses=1]
+	%11 = getelementptr i8* %0, i64 %10		; <i8*> [#uses=1]
+	%12 = icmp sgt i32 %7, 0		; <i1> [#uses=1]
+	br i1 %12, label %13, label %14
+
+; <label>:13		; preds = %13, %3
+	br label %13
+
+; <label>:14		; preds = %3
+	%15 = call noalias i8* @make_temp_file(i8* %11) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare noalias i8* @make_temp_file(i8*)

diff --git a/test/CodeGen/X86/2009-03-07-FPConstSelect.ll b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
new file mode 100644
index 0000000..39caddc
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-07-FPConstSelect.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
+; This should do a single load into the fp stack for the return, not diddle with xmm registers.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define float @f(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01
+	ret float %iftmp.0.0
+}

diff --git a/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
new file mode 100644
index 0000000..896c968
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-09-APIntCrash.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64
+; PR3763
+	%struct.__block_descriptor = type { i64, i64 }
+
+define %struct.__block_descriptor @evUTCTime() nounwind {
+entry:
+	br i1 false, label %if.then, label %return
+
+if.then:		; preds = %entry
+	%srcval18 = load i128* null, align 8		; <i128> [#uses=1]
+	%tmp15 = lshr i128 %srcval18, 64		; <i128> [#uses=1]
+	%tmp9 = mul i128 %tmp15, 18446744073709551616000		; <i128> [#uses=1]
+	br label %return
+
+return:		; preds = %if.then, %entry
+	%retval.0 = phi i128 [ %tmp9, %if.then ], [ undef, %entry ]		; <i128> [#uses=0]
+	ret %struct.__block_descriptor undef
+}
+
+define i128 @test(i128 %arg) nounwind {
+	%A = shl i128 1, 92
+	%B = sub i128 0, %A
+	%C = mul i128 %arg, %B
+	ret i128 %C  ;; should codegen to neg(shift)
+}

diff --git a/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/test/CodeGen/X86/2009-03-09-SpillerBug.ll
new file mode 100644
index 0000000..4224210
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-09-SpillerBug.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR3706
+
+define void @__mulxc3(x86_fp80 %b) nounwind {
+entry:
+	%call = call x86_fp80 @y(x86_fp80* null, x86_fp80* null)		; <x86_fp80> [#uses=0]
+	%cmp = fcmp ord x86_fp80 %b, 0xK00000000000000000000		; <i1> [#uses=1]
+	%sub = fsub x86_fp80 %b, %b		; <x86_fp80> [#uses=1]
+	%cmp7 = fcmp uno x86_fp80 %sub, 0xK00000000000000000000		; <i1> [#uses=1]
+	%and12 = and i1 %cmp7, %cmp		; <i1> [#uses=1]
+	%and = zext i1 %and12 to i32		; <i32> [#uses=1]
+	%conv9 = sitofp i32 %and to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %conv9, x86_fp80* null
+	store x86_fp80 %b, x86_fp80* null
+	ret void
+}
+
+declare x86_fp80 @y(x86_fp80*, x86_fp80*)

diff --git a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
new file mode 100644
index 0000000..90dff88
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://r6661945
+
+	%struct.WINDOW = type { i16, i16, i16, i16, i16, i16, i16, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.ldat*, i16, i16, i32, i32, %struct.WINDOW*, %struct.pdat, i16, %struct.cchar_t }
+	%struct.cchar_t = type { i32, [5 x i32] }
+	%struct.ldat = type { %struct.cchar_t*, i16, i16, i16 }
+	%struct.pdat = type { i16, i16, i16, i16, i16, i16 }
+
+define i32 @pnoutrefresh(%struct.WINDOW* %win, i32 %pminrow, i32 %pmincol, i32 %sminrow, i32 %smincol, i32 %smaxrow, i32 %smaxcol) nounwind optsize ssp {
+entry:
+	%0 = load i16* null, align 4		; <i16> [#uses=2]
+	%1 = icmp sgt i16 0, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb12, label %bb13
+
+bb12:		; preds = %entry
+	%2 = sext i16 %0 to i32		; <i32> [#uses=1]
+	%3 = sub i32 %2, 0		; <i32> [#uses=1]
+	%4 = add i32 %3, %smaxrow		; <i32> [#uses=2]
+	%5 = trunc i32 %4 to i16		; <i16> [#uses=1]
+	%6 = add i16 0, %5		; <i16> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %entry
+	%pmaxrow.0 = phi i16 [ %6, %bb12 ], [ 0, %entry ]		; <i16> [#uses=0]
+	%smaxrow_addr.0 = phi i32 [ %4, %bb12 ], [ %smaxrow, %entry ]		; <i32> [#uses=1]
+	%7 = trunc i32 %smaxrow_addr.0 to i16		; <i16> [#uses=0]
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2009-03-11-CoalescerBug.ll b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll
new file mode 100644
index 0000000..d5ba93e
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-11-CoalescerBug.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
+
+@lookupTable5B = external global [64 x i32], align 32		; <[64 x i32]*> [#uses=1]
+@lookupTable3B = external global [16 x i32], align 32		; <[16 x i32]*> [#uses=1]
+@disparity0 = external global i32		; <i32*> [#uses=5]
+@disparity1 = external global i32		; <i32*> [#uses=3]
+
+define i32 @calc(i32 %theWord, i32 %k) nounwind {
+entry:
+	%0 = lshr i32 %theWord, 3		; <i32> [#uses=1]
+	%1 = and i32 %0, 31		; <i32> [#uses=1]
+	%2 = shl i32 %k, 5		; <i32> [#uses=1]
+	%3 = or i32 %1, %2		; <i32> [#uses=1]
+	%4 = and i32 %theWord, 7		; <i32> [#uses=1]
+	%5 = shl i32 %k, 3		; <i32> [#uses=1]
+	%6 = or i32 %5, %4		; <i32> [#uses=1]
+	%7 = getelementptr [64 x i32]* @lookupTable5B, i32 0, i32 %3		; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4		; <i32> [#uses=5]
+	%9 = getelementptr [16 x i32]* @lookupTable3B, i32 0, i32 %6		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=5]
+	%11 = and i32 %8, 65536		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%13 = and i32 %8, 994		; <i32> [#uses=1]
+	%14 = load i32* @disparity0, align 4		; <i32> [#uses=2]
+	store i32 %14, i32* @disparity1, align 4
+	br label %bb8
+
+bb1:		; preds = %entry
+	%15 = lshr i32 %8, 18		; <i32> [#uses=1]
+	%16 = and i32 %15, 1		; <i32> [#uses=1]
+	%17 = load i32* @disparity0, align 4		; <i32> [#uses=4]
+	%18 = icmp eq i32 %16, %17		; <i1> [#uses=1]
+	%not = select i1 %18, i32 0, i32 994		; <i32> [#uses=1]
+	%.masked = and i32 %8, 994		; <i32> [#uses=1]
+	%result.1 = xor i32 %not, %.masked		; <i32> [#uses=2]
+	%19 = and i32 %8, 524288		; <i32> [#uses=1]
+	%20 = icmp eq i32 %19, 0		; <i1> [#uses=1]
+	br i1 %20, label %bb7, label %bb6
+
+bb6:		; preds = %bb1
+	%21 = xor i32 %17, 1		; <i32> [#uses=2]
+	store i32 %21, i32* @disparity1, align 4
+	br label %bb8
+
+bb7:		; preds = %bb1
+	store i32 %17, i32* @disparity1, align 4
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6, %bb
+	%22 = phi i32 [ %17, %bb7 ], [ %21, %bb6 ], [ %14, %bb ]		; <i32> [#uses=4]
+	%result.0 = phi i32 [ %result.1, %bb7 ], [ %result.1, %bb6 ], [ %13, %bb ]		; <i32> [#uses=2]
+	%23 = and i32 %10, 65536		; <i32> [#uses=1]
+	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
+	br i1 %24, label %bb10, label %bb9
+
+bb9:		; preds = %bb8
+	%25 = and i32 %10, 29		; <i32> [#uses=1]
+	%26 = or i32 %result.0, %25		; <i32> [#uses=1]
+	store i32 %22, i32* @disparity0, align 4
+	ret i32 %26
+
+bb10:		; preds = %bb8
+	%27 = lshr i32 %10, 18		; <i32> [#uses=1]
+	%28 = and i32 %27, 1		; <i32> [#uses=1]
+	%29 = icmp eq i32 %28, %22		; <i1> [#uses=1]
+	%not13 = select i1 %29, i32 0, i32 29		; <i32> [#uses=1]
+	%.masked20 = and i32 %10, 29		; <i32> [#uses=1]
+	%.pn = xor i32 %not13, %.masked20		; <i32> [#uses=1]
+	%result.3 = or i32 %.pn, %result.0		; <i32> [#uses=2]
+	%30 = and i32 %10, 524288		; <i32> [#uses=1]
+	%31 = icmp eq i32 %30, 0		; <i1> [#uses=1]
+	br i1 %31, label %bb17, label %bb16
+
+bb16:		; preds = %bb10
+	%32 = xor i32 %22, 1		; <i32> [#uses=1]
+	store i32 %32, i32* @disparity0, align 4
+	ret i32 %result.3
+
+bb17:		; preds = %bb10
+	store i32 %22, i32* @disparity0, align 4
+	ret i32 %result.3
+}

diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
new file mode 100644
index 0000000..3564f01
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; rdar://6668548
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+define void @SolveCubic_bb1(i32* %solutions, double* %x, x86_fp80 %.reload, x86_fp80 %.reload5, x86_fp80 %.reload6, double %.reload8) nounwind {
+newFuncRoot:
+	br label %bb1
+
+bb1.ret.exitStub:		; preds = %bb1
+	ret void
+
+bb1:		; preds = %newFuncRoot
+	store i32 1, i32* %solutions, align 4
+	%0 = tail call double @llvm.sqrt.f64(double %.reload8)		; <double> [#uses=1]
+	%1 = fptrunc x86_fp80 %.reload6 to double		; <double> [#uses=1]
+	%2 = tail call double @fabs(double %1) nounwind readnone		; <double> [#uses=1]
+	%3 = fadd double %0, %2		; <double> [#uses=1]
+	%4 = tail call double @llvm.pow.f64(double %3, double 0x3FD5555555555555)		; <double> [#uses=1]
+	%5 = fpext double %4 to x86_fp80		; <x86_fp80> [#uses=2]
+	%6 = fdiv x86_fp80 %.reload5, %5		; <x86_fp80> [#uses=1]
+	%7 = fadd x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
+	%8 = fptrunc x86_fp80 %7 to double		; <double> [#uses=1]
+	%9 = fcmp olt x86_fp80 %.reload6, 0xK00000000000000000000		; <i1> [#uses=1]
+	%iftmp.6.0 = select i1 %9, double 1.000000e+00, double -1.000000e+00		; <double> [#uses=1]
+	%10 = fmul double %8, %iftmp.6.0		; <double> [#uses=1]
+	%11 = fpext double %10 to x86_fp80		; <x86_fp80> [#uses=1]
+	%12 = fdiv x86_fp80 %.reload, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%13 = fadd x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
+	%14 = fptrunc x86_fp80 %13 to double		; <double> [#uses=1]
+	store double %14, double* %x, align 1
+	br label %bb1.ret.exitStub
+}

diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
new file mode 100644
index 0000000..ad7f9f7
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; Check the register copy comes after the call to f and before the call to g
+; PR3784
+
+declare i32 @f()
+
+declare i32 @g()
+
+define i32 @phi() {
+entry:
+	%a = call i32 @f()		; <i32> [#uses=1]
+	%b = invoke i32 @g()
+			to label %cont unwind label %lpad		; <i32> [#uses=1]
+
+cont:		; preds = %entry
+	%x = phi i32 [ %b, %entry ]		; <i32> [#uses=0]
+	%aa = call i32 @g()		; <i32> [#uses=1]
+	%bb = invoke i32 @g()
+			to label %cont2 unwind label %lpad		; <i32> [#uses=1]
+
+cont2:		; preds = %cont
+	%xx = phi i32 [ %bb, %cont ]		; <i32> [#uses=1]
+	ret i32 %xx
+
+lpad:		; preds = %cont, %entry
+	%y = phi i32 [ %a, %entry ], [ %aa, %cont ]		; <i32> [#uses=1]
+	ret i32 %y
+}
+
+; CHECK: call{{.*}}f
+; CHECK-NEXT: Llabel1:
+; CHECK-NEXT: movl %eax, %esi

diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
new file mode 100644
index 0000000..11c4101
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
+; Check that register copies in the landing pad come after the EH_LABEL
+
+declare i32 @f()
+
+define i32 @phi(i32 %x) {
+entry:
+	%a = invoke i32 @f()
+			to label %cont unwind label %lpad		; <i32> [#uses=1]
+
+cont:		; preds = %entry
+	%b = invoke i32 @f()
+			to label %cont2 unwind label %lpad		; <i32> [#uses=1]
+
+cont2:		; preds = %cont
+	ret i32 %b
+
+lpad:		; preds = %cont, %entry
+	%v = phi i32 [ %x, %entry ], [ %a, %cont ]		; <i32> [#uses=1]
+	ret i32 %v
+}
+
+; CHECK: lpad
+; CHECK-NEXT: Llabel

diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
new file mode 100644
index 0000000..80e7639
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-16-SpillerBug.ll

@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
+; rdar://6682365
+
+; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
+
+	%struct.CAST_KEY = type { [32 x i32], i32 }
+@CAST_S_table0 = constant [2 x i32] [i32 821772500, i32 -1616838901], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table4 = constant [2 x i32] [i32 2127105028, i32 745436345], align 32		; <[2 x i32]*> [#uses=6]
+@CAST_S_table5 = constant [2 x i32] [i32 -151351395, i32 749497569], align 32		; <[2 x i32]*> [#uses=5]
+@CAST_S_table6 = constant [2 x i32] [i32 -2048901095, i32 858518887], align 32		; <[2 x i32]*> [#uses=4]
+@CAST_S_table7 = constant [2 x i32] [i32 -501862387, i32 -1143078916], align 32		; <[2 x i32]*> [#uses=5]
+@CAST_S_table1 = constant [2 x i32] [i32 522195092, i32 -284448933], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table2 = constant [2 x i32] [i32 -1913667008, i32 637164959], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table3 = constant [2 x i32] [i32 -1649212384, i32 532081118], align 32		; <[2 x i32]*> [#uses=0]
+
+define void @CAST_set_key(%struct.CAST_KEY* nocapture %key, i32 %len, i8* nocapture %data) nounwind ssp {
+bb1.thread:
+	%0 = getelementptr [16 x i32]* null, i32 0, i32 5		; <i32*> [#uses=1]
+	%1 = getelementptr [16 x i32]* null, i32 0, i32 8		; <i32*> [#uses=1]
+	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = shl i32 %2, 24		; <i32> [#uses=1]
+	%4 = load i32* null, align 4		; <i32> [#uses=1]
+	%5 = shl i32 %4, 16		; <i32> [#uses=1]
+	%6 = load i32* null, align 4		; <i32> [#uses=1]
+	%7 = or i32 %5, %3		; <i32> [#uses=1]
+	%8 = or i32 %7, %6		; <i32> [#uses=1]
+	%9 = or i32 %8, 0		; <i32> [#uses=1]
+	%10 = load i32* null, align 4		; <i32> [#uses=1]
+	%11 = shl i32 %10, 24		; <i32> [#uses=1]
+	%12 = load i32* %0, align 4		; <i32> [#uses=1]
+	%13 = shl i32 %12, 16		; <i32> [#uses=1]
+	%14 = load i32* null, align 4		; <i32> [#uses=1]
+	%15 = or i32 %13, %11		; <i32> [#uses=1]
+	%16 = or i32 %15, %14		; <i32> [#uses=1]
+	%17 = or i32 %16, 0		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb11, %bb1.thread
+	%18 = phi i32 [ %110, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%19 = phi i32 [ %112, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%20 = phi i32 [ 0, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%21 = phi i32 [ %113, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%X.0.0 = phi i32 [ %9, %bb1.thread ], [ %92, %bb11 ]		; <i32> [#uses=0]
+	%X.1.0 = phi i32 [ %17, %bb1.thread ], [ 0, %bb11 ]		; <i32> [#uses=0]
+	%22 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %21		; <i32*> [#uses=0]
+	%23 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %18		; <i32*> [#uses=0]
+	%24 = load i32* null, align 4		; <i32> [#uses=1]
+	%25 = xor i32 0, %24		; <i32> [#uses=1]
+	%26 = xor i32 %25, 0		; <i32> [#uses=1]
+	%27 = xor i32 %26, 0		; <i32> [#uses=4]
+	%28 = and i32 %27, 255		; <i32> [#uses=2]
+	%29 = lshr i32 %27, 8		; <i32> [#uses=1]
+	%30 = and i32 %29, 255		; <i32> [#uses=2]
+	%31 = lshr i32 %27, 16		; <i32> [#uses=1]
+	%32 = and i32 %31, 255		; <i32> [#uses=1]
+	%33 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %28		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%35 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %30		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=2]
+	%37 = xor i32 %34, 0		; <i32> [#uses=1]
+	%38 = xor i32 %37, %36		; <i32> [#uses=1]
+	%39 = xor i32 %38, 0		; <i32> [#uses=1]
+	%40 = xor i32 %39, 0		; <i32> [#uses=1]
+	%41 = xor i32 %40, 0		; <i32> [#uses=3]
+	%42 = lshr i32 %41, 8		; <i32> [#uses=1]
+	%43 = and i32 %42, 255		; <i32> [#uses=2]
+	%44 = lshr i32 %41, 16		; <i32> [#uses=1]
+	%45 = and i32 %44, 255		; <i32> [#uses=1]
+	%46 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %43		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = load i32* null, align 4		; <i32> [#uses=1]
+	%49 = xor i32 %47, 0		; <i32> [#uses=1]
+	%50 = xor i32 %49, %48		; <i32> [#uses=1]
+	%51 = xor i32 %50, 0		; <i32> [#uses=1]
+	%52 = xor i32 %51, 0		; <i32> [#uses=1]
+	%53 = xor i32 %52, 0		; <i32> [#uses=2]
+	%54 = and i32 %53, 255		; <i32> [#uses=1]
+	%55 = lshr i32 %53, 24		; <i32> [#uses=1]
+	%56 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %55		; <i32*> [#uses=1]
+	%57 = load i32* %56, align 4		; <i32> [#uses=1]
+	%58 = xor i32 0, %57		; <i32> [#uses=1]
+	%59 = xor i32 %58, 0		; <i32> [#uses=1]
+	%60 = xor i32 %59, 0		; <i32> [#uses=1]
+	store i32 %60, i32* null, align 4
+	%61 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%62 = load i32* %61, align 4		; <i32> [#uses=1]
+	%63 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %54		; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4		; <i32> [#uses=1]
+	%65 = xor i32 0, %64		; <i32> [#uses=1]
+	%66 = xor i32 %65, 0		; <i32> [#uses=1]
+	store i32 %66, i32* null, align 4
+	%67 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %45		; <i32*> [#uses=1]
+	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%69 = xor i32 %36, %34		; <i32> [#uses=1]
+	%70 = xor i32 %69, 0		; <i32> [#uses=1]
+	%71 = xor i32 %70, %68		; <i32> [#uses=1]
+	%72 = xor i32 %71, 0		; <i32> [#uses=1]
+	store i32 %72, i32* null, align 4
+	%73 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %32		; <i32*> [#uses=1]
+	%74 = load i32* %73, align 4		; <i32> [#uses=2]
+	%75 = load i32* null, align 4		; <i32> [#uses=1]
+	%76 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %43		; <i32*> [#uses=1]
+	%77 = load i32* %76, align 4		; <i32> [#uses=1]
+	%78 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 0		; <i32*> [#uses=1]
+	%79 = load i32* %78, align 4		; <i32> [#uses=1]
+	%80 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %30		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=2]
+	%82 = xor i32 %75, %74		; <i32> [#uses=1]
+	%83 = xor i32 %82, %77		; <i32> [#uses=1]
+	%84 = xor i32 %83, %79		; <i32> [#uses=1]
+	%85 = xor i32 %84, %81		; <i32> [#uses=1]
+	store i32 %85, i32* null, align 4
+	%86 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %28		; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4		; <i32> [#uses=1]
+	%88 = xor i32 %74, %41		; <i32> [#uses=1]
+	%89 = xor i32 %88, %87		; <i32> [#uses=1]
+	%90 = xor i32 %89, 0		; <i32> [#uses=1]
+	%91 = xor i32 %90, %81		; <i32> [#uses=1]
+	%92 = xor i32 %91, 0		; <i32> [#uses=3]
+	%93 = lshr i32 %92, 16		; <i32> [#uses=1]
+	%94 = and i32 %93, 255		; <i32> [#uses=1]
+	store i32 %94, i32* null, align 4
+	%95 = lshr i32 %92, 24		; <i32> [#uses=2]
+	%96 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %95		; <i32*> [#uses=1]
+	%97 = load i32* %96, align 4		; <i32> [#uses=1]
+	%98 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4		; <i32> [#uses=1]
+	%100 = load i32* null, align 4		; <i32> [#uses=0]
+	%101 = xor i32 %97, 0		; <i32> [#uses=1]
+	%102 = xor i32 %101, %99		; <i32> [#uses=1]
+	%103 = xor i32 %102, 0		; <i32> [#uses=1]
+	%104 = xor i32 %103, 0		; <i32> [#uses=0]
+	store i32 0, i32* null, align 4
+	%105 = xor i32 0, %27		; <i32> [#uses=1]
+	%106 = xor i32 %105, 0		; <i32> [#uses=1]
+	%107 = xor i32 %106, 0		; <i32> [#uses=1]
+	%108 = xor i32 %107, 0		; <i32> [#uses=1]
+	%109 = xor i32 %108, %62		; <i32> [#uses=3]
+	%110 = and i32 %109, 255		; <i32> [#uses=1]
+	%111 = lshr i32 %109, 16		; <i32> [#uses=1]
+	%112 = and i32 %111, 255		; <i32> [#uses=1]
+	%113 = lshr i32 %109, 24		; <i32> [#uses=3]
+	store i32 %113, i32* %1, align 4
+	%114 = load i32* null, align 4		; <i32> [#uses=1]
+	%115 = xor i32 0, %114		; <i32> [#uses=1]
+	%116 = xor i32 %115, 0		; <i32> [#uses=1]
+	%117 = xor i32 %116, 0		; <i32> [#uses=1]
+	%K.0.sum42 = or i32 0, 12		; <i32> [#uses=1]
+	%118 = getelementptr [32 x i32]* null, i32 0, i32 %K.0.sum42		; <i32*> [#uses=1]
+	store i32 %117, i32* %118, align 4
+	%119 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=0]
+	store i32 0, i32* null, align 4
+	%120 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %113		; <i32*> [#uses=1]
+	%121 = load i32* %120, align 4		; <i32> [#uses=1]
+	%122 = xor i32 0, %121		; <i32> [#uses=1]
+	store i32 %122, i32* null, align 4
+	%123 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%124 = load i32* %123, align 4		; <i32> [#uses=1]
+	%125 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %95		; <i32*> [#uses=1]
+	%126 = load i32* %125, align 4		; <i32> [#uses=1]
+	%127 = xor i32 0, %124		; <i32> [#uses=1]
+	%128 = xor i32 %127, 0		; <i32> [#uses=1]
+	%129 = xor i32 %128, %126		; <i32> [#uses=1]
+	%130 = xor i32 %129, 0		; <i32> [#uses=1]
+	store i32 %130, i32* null, align 4
+	br label %bb11
+}

diff --git a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
new file mode 100644
index 0000000..06dfdc0
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0
+
+define fastcc void @optimize_bit_field() nounwind {
+bb4:
+        %a = load i32* null             ; <i32> [#uses=1]
+        %s = load i32* getelementptr (i32* null, i32 1)         ; <i32> [#uses=1]
+        %z = load i32* getelementptr (i32* null, i32 2)         ; <i32> [#uses=1]
+        %r = bitcast i32 0 to i32          ; <i32> [#uses=1]
+        %q = trunc i32 %z to i8            ; <i8> [#uses=1]
+        %b = icmp eq i8 0, %q              ; <i1> [#uses=1]
+        br i1 %b, label %bb73, label %bb72
+
+bb72:      ; preds = %bb4
+        %f = tail call fastcc i32 @gen_lowpart(i32 %r, i32 %a) nounwind              ; <i32> [#uses=1]
+        br label %bb73
+
+bb73:         ; preds = %bb72, %bb4
+        %y = phi i32 [ %f, %bb72 ], [ %s, %bb4 ]          ; <i32> [#uses=1]
+        store i32 %y, i32* getelementptr (i32* null, i32 3)
+        unreachable
+}
+
+declare fastcc i32 @gen_lowpart(i32, i32) nounwind

diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
new file mode 100644
index 0000000..b5873ba
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll

@@ -0,0 +1,242 @@
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
+; RUN: not grep spill %t
+; RUN: not grep {%rsp} %t
+; RUN: not grep {%rbp} %t
+
+; The register-pressure scheduler should be able to schedule this in a
+; way that does not require spills.
+
+@X = external global i64		; <i64*> [#uses=25]
+
+define fastcc i64 @foo() nounwind {
+	%tmp = volatile load i64* @X		; <i64> [#uses=7]
+	%tmp1 = volatile load i64* @X		; <i64> [#uses=5]
+	%tmp2 = volatile load i64* @X		; <i64> [#uses=3]
+	%tmp3 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp4 = volatile load i64* @X		; <i64> [#uses=5]
+	%tmp5 = volatile load i64* @X		; <i64> [#uses=3]
+	%tmp6 = volatile load i64* @X		; <i64> [#uses=2]
+	%tmp7 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp8 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp9 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp10 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp11 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp12 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp13 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp14 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp15 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp16 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp17 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp18 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp19 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp20 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp21 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp22 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp23 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
+	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
+	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
+	%tmp27 = add i64 %tmp7, %tmp4		; <i64> [#uses=1]
+	%tmp28 = add i64 %tmp27, %tmp26		; <i64> [#uses=1]
+	%tmp29 = add i64 %tmp28, %tmp24		; <i64> [#uses=2]
+	%tmp30 = add i64 %tmp2, %tmp1		; <i64> [#uses=1]
+	%tmp31 = add i64 %tmp30, %tmp		; <i64> [#uses=1]
+	%tmp32 = add i64 %tmp2, %tmp1		; <i64> [#uses=1]
+	%tmp33 = add i64 %tmp31, %tmp32		; <i64> [#uses=1]
+	%tmp34 = add i64 %tmp29, %tmp3		; <i64> [#uses=5]
+	%tmp35 = add i64 %tmp33, %tmp		; <i64> [#uses=1]
+	%tmp36 = add i64 %tmp35, %tmp29		; <i64> [#uses=7]
+	%tmp37 = call i64 @llvm.bswap.i64(i64 %tmp9)		; <i64> [#uses=1]
+	%tmp38 = add i64 %tmp4, %tmp5		; <i64> [#uses=1]
+	%tmp39 = add i64 %tmp38, %tmp34		; <i64> [#uses=1]
+	%tmp40 = add i64 %tmp6, %tmp37		; <i64> [#uses=1]
+	%tmp41 = add i64 %tmp40, %tmp39		; <i64> [#uses=1]
+	%tmp42 = add i64 %tmp41, %tmp34		; <i64> [#uses=2]
+	%tmp43 = add i64 %tmp1, %tmp		; <i64> [#uses=1]
+	%tmp44 = add i64 %tmp36, %tmp43		; <i64> [#uses=1]
+	%tmp45 = add i64 %tmp1, %tmp		; <i64> [#uses=1]
+	%tmp46 = add i64 %tmp44, %tmp45		; <i64> [#uses=1]
+	%tmp47 = add i64 %tmp42, %tmp2		; <i64> [#uses=5]
+	%tmp48 = add i64 %tmp36, %tmp46		; <i64> [#uses=1]
+	%tmp49 = add i64 %tmp48, %tmp42		; <i64> [#uses=7]
+	%tmp50 = call i64 @llvm.bswap.i64(i64 %tmp10)		; <i64> [#uses=1]
+	%tmp51 = add i64 %tmp34, %tmp4		; <i64> [#uses=1]
+	%tmp52 = add i64 %tmp51, %tmp47		; <i64> [#uses=1]
+	%tmp53 = add i64 %tmp5, %tmp50		; <i64> [#uses=1]
+	%tmp54 = add i64 %tmp53, %tmp52		; <i64> [#uses=1]
+	%tmp55 = add i64 %tmp54, %tmp47		; <i64> [#uses=2]
+	%tmp56 = add i64 %tmp36, %tmp		; <i64> [#uses=1]
+	%tmp57 = add i64 %tmp49, %tmp56		; <i64> [#uses=1]
+	%tmp58 = add i64 %tmp36, %tmp		; <i64> [#uses=1]
+	%tmp59 = add i64 %tmp57, %tmp58		; <i64> [#uses=1]
+	%tmp60 = add i64 %tmp55, %tmp1		; <i64> [#uses=5]
+	%tmp61 = add i64 %tmp49, %tmp59		; <i64> [#uses=1]
+	%tmp62 = add i64 %tmp61, %tmp55		; <i64> [#uses=7]
+	%tmp63 = call i64 @llvm.bswap.i64(i64 %tmp11)		; <i64> [#uses=1]
+	%tmp64 = add i64 %tmp47, %tmp34		; <i64> [#uses=1]
+	%tmp65 = add i64 %tmp64, %tmp60		; <i64> [#uses=1]
+	%tmp66 = add i64 %tmp4, %tmp63		; <i64> [#uses=1]
+	%tmp67 = add i64 %tmp66, %tmp65		; <i64> [#uses=1]
+	%tmp68 = add i64 %tmp67, %tmp60		; <i64> [#uses=2]
+	%tmp69 = add i64 %tmp49, %tmp36		; <i64> [#uses=1]
+	%tmp70 = add i64 %tmp62, %tmp69		; <i64> [#uses=1]
+	%tmp71 = add i64 %tmp49, %tmp36		; <i64> [#uses=1]
+	%tmp72 = add i64 %tmp70, %tmp71		; <i64> [#uses=1]
+	%tmp73 = add i64 %tmp68, %tmp		; <i64> [#uses=5]
+	%tmp74 = add i64 %tmp62, %tmp72		; <i64> [#uses=1]
+	%tmp75 = add i64 %tmp74, %tmp68		; <i64> [#uses=7]
+	%tmp76 = call i64 @llvm.bswap.i64(i64 %tmp12)		; <i64> [#uses=1]
+	%tmp77 = add i64 %tmp60, %tmp47		; <i64> [#uses=1]
+	%tmp78 = add i64 %tmp77, %tmp73		; <i64> [#uses=1]
+	%tmp79 = add i64 %tmp34, %tmp76		; <i64> [#uses=1]
+	%tmp80 = add i64 %tmp79, %tmp78		; <i64> [#uses=1]
+	%tmp81 = add i64 %tmp80, %tmp73		; <i64> [#uses=2]
+	%tmp82 = add i64 %tmp62, %tmp49		; <i64> [#uses=1]
+	%tmp83 = add i64 %tmp75, %tmp82		; <i64> [#uses=1]
+	%tmp84 = add i64 %tmp62, %tmp49		; <i64> [#uses=1]
+	%tmp85 = add i64 %tmp83, %tmp84		; <i64> [#uses=1]
+	%tmp86 = add i64 %tmp81, %tmp36		; <i64> [#uses=5]
+	%tmp87 = add i64 %tmp75, %tmp85		; <i64> [#uses=1]
+	%tmp88 = add i64 %tmp87, %tmp81		; <i64> [#uses=7]
+	%tmp89 = call i64 @llvm.bswap.i64(i64 %tmp13)		; <i64> [#uses=1]
+	%tmp90 = add i64 %tmp73, %tmp60		; <i64> [#uses=1]
+	%tmp91 = add i64 %tmp90, %tmp86		; <i64> [#uses=1]
+	%tmp92 = add i64 %tmp47, %tmp89		; <i64> [#uses=1]
+	%tmp93 = add i64 %tmp92, %tmp91		; <i64> [#uses=1]
+	%tmp94 = add i64 %tmp93, %tmp86		; <i64> [#uses=2]
+	%tmp95 = add i64 %tmp75, %tmp62		; <i64> [#uses=1]
+	%tmp96 = add i64 %tmp88, %tmp95		; <i64> [#uses=1]
+	%tmp97 = add i64 %tmp75, %tmp62		; <i64> [#uses=1]
+	%tmp98 = add i64 %tmp96, %tmp97		; <i64> [#uses=1]
+	%tmp99 = add i64 %tmp94, %tmp49		; <i64> [#uses=5]
+	%tmp100 = add i64 %tmp88, %tmp98		; <i64> [#uses=1]
+	%tmp101 = add i64 %tmp100, %tmp94		; <i64> [#uses=7]
+	%tmp102 = call i64 @llvm.bswap.i64(i64 %tmp14)		; <i64> [#uses=1]
+	%tmp103 = add i64 %tmp86, %tmp73		; <i64> [#uses=1]
+	%tmp104 = add i64 %tmp103, %tmp99		; <i64> [#uses=1]
+	%tmp105 = add i64 %tmp102, %tmp60		; <i64> [#uses=1]
+	%tmp106 = add i64 %tmp105, %tmp104		; <i64> [#uses=1]
+	%tmp107 = add i64 %tmp106, %tmp99		; <i64> [#uses=2]
+	%tmp108 = add i64 %tmp88, %tmp75		; <i64> [#uses=1]
+	%tmp109 = add i64 %tmp101, %tmp108		; <i64> [#uses=1]
+	%tmp110 = add i64 %tmp88, %tmp75		; <i64> [#uses=1]
+	%tmp111 = add i64 %tmp109, %tmp110		; <i64> [#uses=1]
+	%tmp112 = add i64 %tmp107, %tmp62		; <i64> [#uses=5]
+	%tmp113 = add i64 %tmp101, %tmp111		; <i64> [#uses=1]
+	%tmp114 = add i64 %tmp113, %tmp107		; <i64> [#uses=7]
+	%tmp115 = call i64 @llvm.bswap.i64(i64 %tmp15)		; <i64> [#uses=1]
+	%tmp116 = add i64 %tmp99, %tmp86		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp116, %tmp112		; <i64> [#uses=1]
+	%tmp118 = add i64 %tmp115, %tmp73		; <i64> [#uses=1]
+	%tmp119 = add i64 %tmp118, %tmp117		; <i64> [#uses=1]
+	%tmp120 = add i64 %tmp119, %tmp112		; <i64> [#uses=2]
+	%tmp121 = add i64 %tmp101, %tmp88		; <i64> [#uses=1]
+	%tmp122 = add i64 %tmp114, %tmp121		; <i64> [#uses=1]
+	%tmp123 = add i64 %tmp101, %tmp88		; <i64> [#uses=1]
+	%tmp124 = add i64 %tmp122, %tmp123		; <i64> [#uses=1]
+	%tmp125 = add i64 %tmp120, %tmp75		; <i64> [#uses=5]
+	%tmp126 = add i64 %tmp114, %tmp124		; <i64> [#uses=1]
+	%tmp127 = add i64 %tmp126, %tmp120		; <i64> [#uses=7]
+	%tmp128 = call i64 @llvm.bswap.i64(i64 %tmp16)		; <i64> [#uses=1]
+	%tmp129 = add i64 %tmp112, %tmp99		; <i64> [#uses=1]
+	%tmp130 = add i64 %tmp129, %tmp125		; <i64> [#uses=1]
+	%tmp131 = add i64 %tmp128, %tmp86		; <i64> [#uses=1]
+	%tmp132 = add i64 %tmp131, %tmp130		; <i64> [#uses=1]
+	%tmp133 = add i64 %tmp132, %tmp125		; <i64> [#uses=2]
+	%tmp134 = add i64 %tmp114, %tmp101		; <i64> [#uses=1]
+	%tmp135 = add i64 %tmp127, %tmp134		; <i64> [#uses=1]
+	%tmp136 = add i64 %tmp114, %tmp101		; <i64> [#uses=1]
+	%tmp137 = add i64 %tmp135, %tmp136		; <i64> [#uses=1]
+	%tmp138 = add i64 %tmp133, %tmp88		; <i64> [#uses=5]
+	%tmp139 = add i64 %tmp127, %tmp137		; <i64> [#uses=1]
+	%tmp140 = add i64 %tmp139, %tmp133		; <i64> [#uses=7]
+	%tmp141 = call i64 @llvm.bswap.i64(i64 %tmp17)		; <i64> [#uses=1]
+	%tmp142 = add i64 %tmp125, %tmp112		; <i64> [#uses=1]
+	%tmp143 = add i64 %tmp142, %tmp138		; <i64> [#uses=1]
+	%tmp144 = add i64 %tmp141, %tmp99		; <i64> [#uses=1]
+	%tmp145 = add i64 %tmp144, %tmp143		; <i64> [#uses=1]
+	%tmp146 = add i64 %tmp145, %tmp138		; <i64> [#uses=2]
+	%tmp147 = add i64 %tmp127, %tmp114		; <i64> [#uses=1]
+	%tmp148 = add i64 %tmp140, %tmp147		; <i64> [#uses=1]
+	%tmp149 = add i64 %tmp127, %tmp114		; <i64> [#uses=1]
+	%tmp150 = add i64 %tmp148, %tmp149		; <i64> [#uses=1]
+	%tmp151 = add i64 %tmp146, %tmp101		; <i64> [#uses=5]
+	%tmp152 = add i64 %tmp140, %tmp150		; <i64> [#uses=1]
+	%tmp153 = add i64 %tmp152, %tmp146		; <i64> [#uses=7]
+	%tmp154 = call i64 @llvm.bswap.i64(i64 %tmp18)		; <i64> [#uses=1]
+	%tmp155 = add i64 %tmp138, %tmp125		; <i64> [#uses=1]
+	%tmp156 = add i64 %tmp155, %tmp151		; <i64> [#uses=1]
+	%tmp157 = add i64 %tmp154, %tmp112		; <i64> [#uses=1]
+	%tmp158 = add i64 %tmp157, %tmp156		; <i64> [#uses=1]
+	%tmp159 = add i64 %tmp158, %tmp151		; <i64> [#uses=2]
+	%tmp160 = add i64 %tmp140, %tmp127		; <i64> [#uses=1]
+	%tmp161 = add i64 %tmp153, %tmp160		; <i64> [#uses=1]
+	%tmp162 = add i64 %tmp140, %tmp127		; <i64> [#uses=1]
+	%tmp163 = add i64 %tmp161, %tmp162		; <i64> [#uses=1]
+	%tmp164 = add i64 %tmp159, %tmp114		; <i64> [#uses=5]
+	%tmp165 = add i64 %tmp153, %tmp163		; <i64> [#uses=1]
+	%tmp166 = add i64 %tmp165, %tmp159		; <i64> [#uses=7]
+	%tmp167 = call i64 @llvm.bswap.i64(i64 %tmp19)		; <i64> [#uses=1]
+	%tmp168 = add i64 %tmp151, %tmp138		; <i64> [#uses=1]
+	%tmp169 = add i64 %tmp168, %tmp164		; <i64> [#uses=1]
+	%tmp170 = add i64 %tmp167, %tmp125		; <i64> [#uses=1]
+	%tmp171 = add i64 %tmp170, %tmp169		; <i64> [#uses=1]
+	%tmp172 = add i64 %tmp171, %tmp164		; <i64> [#uses=2]
+	%tmp173 = add i64 %tmp153, %tmp140		; <i64> [#uses=1]
+	%tmp174 = add i64 %tmp166, %tmp173		; <i64> [#uses=1]
+	%tmp175 = add i64 %tmp153, %tmp140		; <i64> [#uses=1]
+	%tmp176 = add i64 %tmp174, %tmp175		; <i64> [#uses=1]
+	%tmp177 = add i64 %tmp172, %tmp127		; <i64> [#uses=5]
+	%tmp178 = add i64 %tmp166, %tmp176		; <i64> [#uses=1]
+	%tmp179 = add i64 %tmp178, %tmp172		; <i64> [#uses=6]
+	%tmp180 = call i64 @llvm.bswap.i64(i64 %tmp20)		; <i64> [#uses=1]
+	%tmp181 = add i64 %tmp164, %tmp151		; <i64> [#uses=1]
+	%tmp182 = add i64 %tmp181, %tmp177		; <i64> [#uses=1]
+	%tmp183 = add i64 %tmp180, %tmp138		; <i64> [#uses=1]
+	%tmp184 = add i64 %tmp183, %tmp182		; <i64> [#uses=1]
+	%tmp185 = add i64 %tmp184, %tmp177		; <i64> [#uses=2]
+	%tmp186 = add i64 %tmp166, %tmp153		; <i64> [#uses=1]
+	%tmp187 = add i64 %tmp179, %tmp186		; <i64> [#uses=1]
+	%tmp188 = add i64 %tmp166, %tmp153		; <i64> [#uses=1]
+	%tmp189 = add i64 %tmp187, %tmp188		; <i64> [#uses=1]
+	%tmp190 = add i64 %tmp185, %tmp140		; <i64> [#uses=4]
+	%tmp191 = add i64 %tmp179, %tmp189		; <i64> [#uses=1]
+	%tmp192 = add i64 %tmp191, %tmp185		; <i64> [#uses=4]
+	%tmp193 = call i64 @llvm.bswap.i64(i64 %tmp21)		; <i64> [#uses=1]
+	%tmp194 = add i64 %tmp177, %tmp164		; <i64> [#uses=1]
+	%tmp195 = add i64 %tmp194, %tmp190		; <i64> [#uses=1]
+	%tmp196 = add i64 %tmp193, %tmp151		; <i64> [#uses=1]
+	%tmp197 = add i64 %tmp196, %tmp195		; <i64> [#uses=1]
+	%tmp198 = add i64 %tmp197, %tmp190		; <i64> [#uses=2]
+	%tmp199 = add i64 %tmp179, %tmp166		; <i64> [#uses=1]
+	%tmp200 = add i64 %tmp192, %tmp199		; <i64> [#uses=1]
+	%tmp201 = add i64 %tmp179, %tmp166		; <i64> [#uses=1]
+	%tmp202 = add i64 %tmp200, %tmp201		; <i64> [#uses=1]
+	%tmp203 = add i64 %tmp198, %tmp153		; <i64> [#uses=3]
+	%tmp204 = add i64 %tmp192, %tmp202		; <i64> [#uses=1]
+	%tmp205 = add i64 %tmp204, %tmp198		; <i64> [#uses=2]
+	%tmp206 = call i64 @llvm.bswap.i64(i64 %tmp22)		; <i64> [#uses=1]
+	%tmp207 = add i64 %tmp190, %tmp177		; <i64> [#uses=1]
+	%tmp208 = add i64 %tmp207, %tmp203		; <i64> [#uses=1]
+	%tmp209 = add i64 %tmp206, %tmp164		; <i64> [#uses=1]
+	%tmp210 = add i64 %tmp209, %tmp208		; <i64> [#uses=1]
+	%tmp211 = add i64 %tmp210, %tmp203		; <i64> [#uses=2]
+	%tmp212 = add i64 %tmp192, %tmp179		; <i64> [#uses=1]
+	%tmp213 = add i64 %tmp205, %tmp212		; <i64> [#uses=1]
+	%tmp214 = add i64 %tmp192, %tmp179		; <i64> [#uses=1]
+	%tmp215 = add i64 %tmp213, %tmp214		; <i64> [#uses=1]
+	%tmp216 = add i64 %tmp211, %tmp166		; <i64> [#uses=2]
+	%tmp217 = add i64 %tmp205, %tmp215		; <i64> [#uses=1]
+	%tmp218 = add i64 %tmp217, %tmp211		; <i64> [#uses=1]
+	%tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23)		; <i64> [#uses=2]
+	volatile store i64 %tmp219, i64* @X, align 8
+	%tmp220 = add i64 %tmp203, %tmp190		; <i64> [#uses=1]
+	%tmp221 = add i64 %tmp220, %tmp216		; <i64> [#uses=1]
+	%tmp222 = add i64 %tmp219, %tmp177		; <i64> [#uses=1]
+	%tmp223 = add i64 %tmp222, %tmp221		; <i64> [#uses=1]
+	%tmp224 = add i64 %tmp223, %tmp216		; <i64> [#uses=1]
+	%tmp225 = add i64 %tmp224, %tmp218		; <i64> [#uses=1]
+	ret i64 %tmp225
+}
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone

diff --git a/test/CodeGen/X86/2009-03-23-i80-fp80.ll b/test/CodeGen/X86/2009-03-23-i80-fp80.ll
new file mode 100644
index 0000000..e542325
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-23-i80-fp80.ll

@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep 302245289961712575840256
+; RUN: opt < %s -instcombine -S | grep K40018000000000000000
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define i80 @from() {
+  %tmp = bitcast x86_fp80 0xK4000C000000000000000 to i80
+  ret i80 %tmp
+}
+
+define x86_fp80 @to() {
+  %tmp = bitcast i80 302259125019767858003968 to x86_fp80
+  ret x86_fp80 %tmp
+}

diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
new file mode 100644
index 0000000..f40fddc
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: not grep and %t
+; RUN: not grep shr %t
+; rdar://6661955
+
+@hello = internal constant [7 x i8] c"hello\0A\00"
+@world = internal constant [7 x i8] c"world\0A\00"
+
+define void @func(i32* %b) nounwind {
+bb1579.i.i:		; preds = %bb1514.i.i, %bb191.i.i
+	%tmp176 = load i32* %b, align 4
+	%tmp177 = and i32 %tmp176, 2
+	%tmp178 = icmp eq i32 %tmp177, 0
+        br i1 %tmp178, label %hello, label %world
+
+hello:
+	%h = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @hello, i32 0, i32 0))
+        ret void
+
+world:
+	%w = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @world, i32 0, i32 0))
+        ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
new file mode 100644
index 0000000..f486479
--- /dev/null
+++ b/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define double @t(double %x) nounwind ssp noimplicitfloat {
+entry:
+	br i1 false, label %return, label %bb3
+
+bb3:		; preds = %entry
+	ret double 0.000000e+00
+
+return:		; preds = %entry
+	ret double undef
+}

diff --git a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
new file mode 100644
index 0000000..97bbd93
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll

@@ -0,0 +1,165 @@
+; RUN: llc < %s 
+; rdar://6774324
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+	type <{ i32, %1 }>		; type %0
+	type <{ [216 x i8] }>		; type %1
+	type <{ %3, %4*, %28*, i64, i32, %6, %6, i32, i32, i32, i32, void (i8*, i32)*, i8*, %29*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i8*], i32, %30, i32, %24, %4*, %4*, i64, i64, i32, i32, void (i32, %2*)*, i32, i32, i32, i32, i32, i32, i32, i32, %24, i64, i64, i64, i64, i64, %21, i32, i32, %21, i32, %31*, %3, %33, %34, %9*, i32, i32, %3, %3, %35, %41*, %42*, %11, i32, i32, i32, i8, i8, i8, i8, %69*, %69, %9*, %9*, [11 x %61], %3, i8*, i32, i64, i64, i32, i32, i32, i64 }>		; type %2
+	type <{ %3*, %3* }>		; type %3
+	type <{ %3, i32, %2*, %2*, %2*, %5*, i32, i32, %21, i64, i64, i64, i32, %22, %9*, %6, %4*, %23 }>		; type %4
+	type <{ %3, %3, %4*, %4*, i32, %6, %9*, %9*, %5*, %20* }>		; type %5
+	type <{ %7, i16, i8, i8, %8 }>		; type %6
+	type <{ i32 }>		; type %7
+	type <{ i8*, i8*, [2 x i32], i16, i8, i8, i8*, i8, i8, i8, i8, i8* }>		; type %8
+	type <{ %10, %13, %15, i32, i32, i32, i32, %9*, %9*, %16*, i32, %17*, i64, i32 }>		; type %9
+	type <{ i32, i32, %11 }>		; type %10
+	type <{ %12 }>		; type %11
+	type <{ [12 x i8] }>		; type %12
+	type <{ %14 }>		; type %13
+	type <{ [40 x i8] }>		; type %14
+	type <{ [4 x i8] }>		; type %15
+	type <{ %15, %15 }>		; type %16
+	type <{ %17*, %17*, %9*, i32, %18*, %19* }>		; type %17
+	type opaque		; type %18
+	type <{ i32, i32, %9*, %9*, i32, i32 }>		; type %19
+	type <{ %5*, %20*, %20*, %20* }>		; type %20
+	type <{ %3, %3*, void (i8*, i8*)*, i8*, i8*, i64 }>		; type %21
+	type <{ i32, [4 x i32], i32, i32, [128 x %3] }>		; type %22
+	type <{ %24, %24, %24, %24*, %24*, %24*, %25, %26, %27, i32, i32, i8* }>		; type %23
+	type <{ i64, i32, i32, i32 }>		; type %24
+	type <{ i32, i32 }>		; type %25
+	type <{ i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32 }>		; type %26
+	type <{ [16 x %17*], i32 }>		; type %27
+	type <{ i8, i8, i8, i8, %7, %3 }>		; type %28
+	type <{ i32, %11*, i8*, i8*, %11* }>		; type %29
+	type <{ i32, i32, i32, i32, i64 }>		; type %30
+	type <{ %32*, %3, %3, i32, i32, i32, %5* }>		; type %31
+	type opaque		; type %32
+	type <{ [44 x i8] }>		; type %33
+	type <{ %17* }>		; type %34
+	type <{ %36, %36*, i32, [4 x %40], i32, i32, i64, i32 }>		; type %35
+	type <{ i8*, %0*, %37*, i64, %39, i32, %39, %6, i64, i64, i8*, i32 }>		; type %36
+	type <{ i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, %38 }>		; type %37
+	type <{ i16, i16, i8, i8, i16, i32, i16, i16, i32, i16, i16, i32, i32, [8 x [8 x i16]], [8 x [16 x i16]], [96 x i8] }>		; type %38
+	type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>		; type %39
+	type <{ i64 }>		; type %40
+	type <{ %11, i32, i32, i32, %42*, %3, i8*, %3, %5*, %32*, i32, i32, i32, i32, i32, i32, i32, %59, %60, i64, i64, i32, %11, %9*, %9*, %9*, [11 x %61], %9*, %9*, %9*, %9*, %9*, [3 x %9*], %62*, %3, %3, i32, i32, %9*, %9*, i32, %67*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, %68*, [2 x i32], i64, i64, i32 }>		; type %41
+	type <{ %43, %44, %47*, i64, i64, i64, i32, %11, %54, %46*, %46*, i32, i32, i32, i32, i32, i32, i32 }>		; type %42
+	type <{ i16, i8, i8, i32, i32 }>		; type %43
+	type <{ %45, i32, i32 }>		; type %44
+	type <{ %46*, %46*, i64, i64 }>		; type %45
+	type <{ %45, %15, i64, i8, i8, i8, i8, i16, i16 }>		; type %46
+	type <{ i64*, i64, %48*, i32, i32, i32, %6, %53, i32, i64, i64*, i64*, %48*, %48*, %48*, i32 }>		; type %47
+	type <{ %3, %43, i64, %49*, i32, i32, i32, i32, %48*, %48*, i64, %50*, i64, %52*, i32, i16, i16, i8, i8, i8, i8, %3, %3, i64, i32, i32, i32, i8*, i32, i8, i8, i8, i8, %3 }>		; type %48
+	type <{ %3, %3, %49*, %48*, i64, i8, i8, i8, i8, i32, i8, i8, i8, i8 }>		; type %49
+	type <{ i32, %51* }>		; type %50
+	type <{ void (%50*)*, void (%50*)*, i32 (%50*, %52*, i32)*, i32 (%50*)*, i32 (%50*, i64, i32, i32, i32*)*, i32 (%50*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%50*, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i32)*, i32 (%50*)*, i8* }>		; type %51
+	type <{ i32, %48* }>		; type %52
+	type <{ i32, i32, i32 }>		; type %53
+	type <{ %11, %55*, i32, %53, i64 }>		; type %54
+	type <{ %3, i32, i32, i32, i32, i32, [64 x i8], %56 }>		; type %55
+	type <{ %57, %58, %58 }>		; type %56
+	type <{ i64, i64, i64, i64, i64 }>		; type %57
+	type <{ i64, i64, i64, i64, i64, i64, i64, i64 }>		; type %58
+	type <{ [2 x i32] }>		; type %59
+	type <{ [8 x i32] }>		; type %60
+	type <{ %9*, i32, i32, i32 }>		; type %61
+	type <{ %11, i32, %11, i32, i32, %63*, i32, %64*, %65, i32, i32, i32, i32, %41* }>		; type %62
+	type <{ %10*, i32, %15, %15 }>		; type %63
+	type opaque		; type %64
+	type <{ i32, %66*, %66*, %66**, %66*, %66** }>		; type %65
+	type <{ %63, i32, %62*, %66*, %66* }>		; type %66
+	type <{ i32, i32, [0 x %39] }>		; type %67
+	type opaque		; type %68
+	type <{ %69*, void (%69*, %2*)* }>		; type %69
+	type <{ %70*, %2*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, %71, i32, i32, i64, i64, i64, %72, i8*, i8*, %73, %4*, %79*, %81*, %39*, %84, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, i64*, i32, i64*, i8*, i32, [256 x i32], i64, i64, %86, %77*, i64, i64, %88*, %2*, %2* }>		; type %70
+	type <{ %3, i64, i32, i32 }>		; type %71
+	type <{ i64, i64, i64 }>		; type %72
+	type <{ %73*, %73*, %73*, %73*, %74*, %75*, %76*, %70*, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, [3 x %78*], i8*, i8* }>		; type %73
+	type <{ %74*, %74*, %75*, %76*, %73*, i32, i32, i32, i32, i32, i8*, i8* }>		; type %74
+	type <{ %75*, %73*, %74*, %76*, i32, i32, i32, i32, %78*, i8*, i8* }>		; type %75
+	type <{ %76*, %73*, %74*, %75*, i32, i32, i32, i32, i8*, i8*, %77* }>		; type %76
+	type opaque		; type %77
+	type <{ %78*, %75*, i8, i8, i8, i8, i16, i16, i16, i8, i8, i32, [0 x %73*] }>		; type %78
+	type <{ i32, i32, i32, [20 x %80] }>		; type %79
+	type <{ i64*, i8* }>		; type %80
+	type <{ [256 x %39], [19 x %39], i8, i8, i8, i8, i8, i8, i8, i8, %82, i8, i8, i8, i8, i8, i8, i8, i8, %82, %83 }>		; type %81
+	type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16 }>		; type %82
+	type <{ [16 x i64], i64 }>		; type %83
+	type <{ %82*, %85, %85, %39*, i32 }>		; type %84
+	type <{ i16, %39* }>		; type %85
+	type <{ %87, i8* }>		; type %86
+	type <{ i32, i32, i32, i8, i8, i16, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>		; type %87
+	type <{ i64, i64, i32, i32, i32, i32 }>		; type %88
+	type <{ i32, i32, i32, i32, i32, i32, i32 }>		; type %89
+@kernel_stack_size = external global i32		; <i32*> [#uses=1]
+
+define void @test(%0*) nounwind {
+	%2 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
+	%3 = getelementptr %2* %2, i32 0, i32 15		; <i32*> [#uses=1]
+	%4 = load i32* %3		; <i32> [#uses=2]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %47, label %6
+
+; <label>:6		; preds = %1
+	%7 = load i32* @kernel_stack_size		; <i32> [#uses=1]
+	%8 = add i32 %7, %4		; <i32> [#uses=1]
+	%9 = inttoptr i32 %8 to %89*		; <%89*> [#uses=12]
+	%10 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
+	%11 = getelementptr %2* %10, i32 0, i32 65, i32 1		; <%36**> [#uses=1]
+	%12 = load %36** %11		; <%36*> [#uses=1]
+	%13 = getelementptr %36* %12, i32 0, i32 1		; <%0**> [#uses=1]
+	%14 = load %0** %13		; <%0*> [#uses=1]
+	%15 = icmp eq %0* %14, %0		; <i1> [#uses=1]
+	br i1 %15, label %40, label %16
+
+; <label>:16		; preds = %6
+	%17 = getelementptr %0* %0, i32 0, i32 1		; <%1*> [#uses=1]
+	%18 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
+	%19 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 32		; <i8*> [#uses=1]
+	%20 = bitcast i8* %19 to i32*		; <i32*> [#uses=1]
+	%21 = load i32* %20		; <i32> [#uses=1]
+	store i32 %21, i32* %18
+	%22 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
+	%23 = ptrtoint %1* %17 to i32		; <i32> [#uses=1]
+	store i32 %23, i32* %22
+	%24 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
+	%25 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 24		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26		; <i32> [#uses=1]
+	store i32 %27, i32* %24
+	%28 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
+	%29 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 16		; <i8*> [#uses=1]
+	%30 = bitcast i8* %29 to i32*		; <i32*> [#uses=1]
+	%31 = load i32* %30		; <i32> [#uses=1]
+	store i32 %31, i32* %28
+	%32 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
+	%33 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 20		; <i8*> [#uses=1]
+	%34 = bitcast i8* %33 to i32*		; <i32*> [#uses=1]
+	%35 = load i32* %34		; <i32> [#uses=1]
+	store i32 %35, i32* %32
+	%36 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
+	%37 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 56		; <i8*> [#uses=1]
+	%38 = bitcast i8* %37 to i32*		; <i32*> [#uses=1]
+	%39 = load i32* %38		; <i32> [#uses=1]
+	store i32 %39, i32* %36
+	ret void
+
+; <label>:40		; preds = %6
+	%41 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %ebx, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %41) nounwind
+	%42 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %esp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %42) nounwind
+	%43 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %ebp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %43) nounwind
+	%44 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %edi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %44) nounwind
+	%45 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %esi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %45) nounwind
+	%46 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl $$1f, $0\0A1:", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %46) nounwind
+	ret void
+
+; <label>:47		; preds = %1
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
new file mode 100644
index 0000000..27f11cf
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -fast-isel
+; radr://6772169
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+	type { i32, i1 }		; type %0
+
+declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+define fastcc i32 @test() nounwind {
+entry:
+	%tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0)
+	%tmp2 = extractvalue %0 %tmp1, 1
+	br i1 %tmp2, label %.backedge, label %BB3
+
+BB3:
+	%tmp4 = extractvalue %0 %tmp1, 0
+	br label %.backedge
+
+.backedge:
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2009-04-12-picrel.ll b/test/CodeGen/X86/2009-04-12-picrel.ll
new file mode 100644
index 0000000..f1942801
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-12-picrel.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
+; RUN: grep leaq %t | count 1
+
+@dst = external global [131072 x i32]
+@ptr = external global i32*
+
+define void @off01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
new file mode 100644
index 0000000..ff8cf0a
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://6781755
+; PR3934
+
+	type { i32, i32 }		; type %0
+
+define void @bn_sqr_comba8(i32* nocapture %r, i32* %a) nounwind {
+entry:
+	%asmtmp23 = tail call %0 asm "mulq $3", "={ax},={dx},{ax},*m,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32* %a) nounwind		; <%0> [#uses=1]
+	%asmresult25 = extractvalue %0 %asmtmp23, 1		; <i32> [#uses=1]
+	%asmtmp26 = tail call %0 asm "addq $0,$0; adcq $2,$1", "={dx},=r,imr,0,1,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32 %asmresult25, i32 0) nounwind		; <%0> [#uses=1]
+	%asmresult27 = extractvalue %0 %asmtmp26, 0		; <i32> [#uses=1]
+	%asmtmp29 = tail call %0 asm "addq $0,$0; adcq $2,$1", "={ax},={dx},imr,0,1,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32 0, i32 %asmresult27) nounwind		; <%0> [#uses=0]
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
new file mode 100644
index 0000000..4362ba4
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s
+; rdar://6781755
+; PR3934
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-undermydesk-freebsd8.0"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+        %call = tail call i32 (...)* @getpid()          ; <i32> [#uses=1]
+        %conv = trunc i32 %call to i16          ; <i16> [#uses=1]
+        %0 = tail call i16 asm "xchgb ${0:h}, ${0:b}","=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv) nounwind           ; <i16> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @getpid(...)

diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
new file mode 100644
index 0000000..bfa3eaa
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil
+; rdar://6787136
+
+	%struct.X = type { i8, [32 x i8] }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @z to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @z() nounwind ssp {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%xxx = alloca %struct.X		; <%struct.X*> [#uses=6]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%2 = getelementptr [32 x i8]* %1, i32 0, i32 31		; <i8*> [#uses=1]
+	store i8 48, i8* %2, align 1
+	%3 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%4 = getelementptr [32 x i8]* %3, i32 0, i32 31		; <i8*> [#uses=1]
+	%5 = load i8* %4, align 1		; <i8> [#uses=1]
+	%6 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%7 = getelementptr [32 x i8]* %6, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 %5, i8* %7, align 1
+	%8 = getelementptr %struct.X* %xxx, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 15, i8* %8, align 1
+	%9 = call i32 (...)* bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind		; <i32> [#uses=1]
+	store i32 %9, i32* %0, align 4
+	%10 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %10, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
+
+declare i32 @f(%struct.X* byval align 4, %struct.X* byval align 4) nounwind ssp

diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
new file mode 100644
index 0000000..f46eed4
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll

@@ -0,0 +1,141 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; XFAIL: *
+; 69408 removed the opportunity for this optimization to work
+
+	%struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 }
+	%struct.anon = type { [16 x i64] }
+@K512 = external constant [80 x i64], align 32		; <[80 x i64]*> [#uses=2]
+
+define fastcc void @sha512_block_data_order(%struct.SHA512_CTX* nocapture %ctx, i8* nocapture %in, i64 %num) nounwind ssp {
+entry:
+	br label %bb349
+
+bb349:		; preds = %bb349, %entry
+	%e.0489 = phi i64 [ 0, %entry ], [ %e.0, %bb349 ]		; <i64> [#uses=3]
+	%b.0472 = phi i64 [ 0, %entry ], [ %87, %bb349 ]		; <i64> [#uses=2]
+	%asmtmp356 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %e.0489) nounwind		; <i64> [#uses=1]
+	%0 = xor i64 0, %asmtmp356		; <i64> [#uses=1]
+	%1 = add i64 0, %0		; <i64> [#uses=1]
+	%2 = add i64 %1, 0		; <i64> [#uses=1]
+	%3 = add i64 %2, 0		; <i64> [#uses=1]
+	%4 = add i64 %3, 0		; <i64> [#uses=5]
+	%asmtmp372 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %4) nounwind		; <i64> [#uses=1]
+	%asmtmp373 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %4) nounwind		; <i64> [#uses=0]
+	%5 = xor i64 %asmtmp372, 0		; <i64> [#uses=0]
+	%6 = xor i64 0, %b.0472		; <i64> [#uses=1]
+	%7 = and i64 %4, %6		; <i64> [#uses=1]
+	%8 = xor i64 %7, 0		; <i64> [#uses=1]
+	%9 = add i64 0, %8		; <i64> [#uses=1]
+	%10 = add i64 %9, 0		; <i64> [#uses=2]
+	%asmtmp377 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 0) nounwind		; <i64> [#uses=1]
+	%11 = xor i64 0, %asmtmp377		; <i64> [#uses=1]
+	%12 = add i64 0, %11		; <i64> [#uses=1]
+	%13 = add i64 %12, 0		; <i64> [#uses=1]
+	%not381 = xor i64 0, -1		; <i64> [#uses=1]
+	%14 = and i64 %e.0489, %not381		; <i64> [#uses=1]
+	%15 = xor i64 0, %14		; <i64> [#uses=1]
+	%16 = add i64 %15, 0		; <i64> [#uses=1]
+	%17 = add i64 %16, %13		; <i64> [#uses=1]
+	%18 = add i64 %17, 0		; <i64> [#uses=1]
+	%19 = add i64 %18, 0		; <i64> [#uses=2]
+	%20 = add i64 %19, %b.0472		; <i64> [#uses=3]
+	%21 = add i64 %19, 0		; <i64> [#uses=1]
+	%22 = add i64 %21, 0		; <i64> [#uses=1]
+	%23 = add i32 0, 12		; <i32> [#uses=1]
+	%24 = and i32 %23, 12		; <i32> [#uses=1]
+	%25 = zext i32 %24 to i64		; <i64> [#uses=1]
+	%26 = getelementptr [16 x i64]* null, i64 0, i64 %25		; <i64*> [#uses=0]
+	%27 = add i64 0, %e.0489		; <i64> [#uses=1]
+	%28 = add i64 %27, 0		; <i64> [#uses=1]
+	%29 = add i64 %28, 0		; <i64> [#uses=1]
+	%30 = add i64 %29, 0		; <i64> [#uses=2]
+	%31 = and i64 %10, %4		; <i64> [#uses=1]
+	%32 = xor i64 0, %31		; <i64> [#uses=1]
+	%33 = add i64 %30, 0		; <i64> [#uses=3]
+	%34 = add i64 %30, %32		; <i64> [#uses=1]
+	%35 = add i64 %34, 0		; <i64> [#uses=1]
+	%36 = and i64 %33, %20		; <i64> [#uses=1]
+	%37 = xor i64 %36, 0		; <i64> [#uses=1]
+	%38 = add i64 %37, 0		; <i64> [#uses=1]
+	%39 = add i64 %38, 0		; <i64> [#uses=1]
+	%40 = add i64 %39, 0		; <i64> [#uses=1]
+	%41 = add i64 %40, 0		; <i64> [#uses=1]
+	%42 = add i64 %41, %4		; <i64> [#uses=3]
+	%43 = or i32 0, 6		; <i32> [#uses=1]
+	%44 = and i32 %43, 14		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [16 x i64]* null, i64 0, i64 %45		; <i64*> [#uses=1]
+	%not417 = xor i64 %42, -1		; <i64> [#uses=1]
+	%47 = and i64 %20, %not417		; <i64> [#uses=1]
+	%48 = xor i64 0, %47		; <i64> [#uses=1]
+	%49 = getelementptr [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
+	%50 = load i64* %49, align 8		; <i64> [#uses=1]
+	%51 = add i64 %48, 0		; <i64> [#uses=1]
+	%52 = add i64 %51, 0		; <i64> [#uses=1]
+	%53 = add i64 %52, 0		; <i64> [#uses=1]
+	%54 = add i64 %53, %50		; <i64> [#uses=2]
+	%asmtmp420 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 0) nounwind		; <i64> [#uses=1]
+	%asmtmp421 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 0) nounwind		; <i64> [#uses=1]
+	%55 = xor i64 %asmtmp420, 0		; <i64> [#uses=1]
+	%56 = xor i64 %55, %asmtmp421		; <i64> [#uses=1]
+	%57 = add i64 %54, %10		; <i64> [#uses=5]
+	%58 = add i64 %54, 0		; <i64> [#uses=1]
+	%59 = add i64 %58, %56		; <i64> [#uses=2]
+	%60 = or i32 0, 7		; <i32> [#uses=1]
+	%61 = and i32 %60, 15		; <i32> [#uses=1]
+	%62 = zext i32 %61 to i64		; <i64> [#uses=1]
+	%63 = getelementptr [16 x i64]* null, i64 0, i64 %62		; <i64*> [#uses=2]
+	%64 = load i64* null, align 8		; <i64> [#uses=1]
+	%65 = lshr i64 %64, 6		; <i64> [#uses=1]
+	%66 = xor i64 0, %65		; <i64> [#uses=1]
+	%67 = xor i64 %66, 0		; <i64> [#uses=1]
+	%68 = load i64* %46, align 8		; <i64> [#uses=1]
+	%69 = load i64* null, align 8		; <i64> [#uses=1]
+	%70 = add i64 %68, 0		; <i64> [#uses=1]
+	%71 = add i64 %70, %67		; <i64> [#uses=1]
+	%72 = add i64 %71, %69		; <i64> [#uses=1]
+	%asmtmp427 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 18, i64 %57) nounwind		; <i64> [#uses=1]
+	%asmtmp428 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %57) nounwind		; <i64> [#uses=1]
+	%73 = xor i64 %asmtmp427, 0		; <i64> [#uses=1]
+	%74 = xor i64 %73, %asmtmp428		; <i64> [#uses=1]
+	%75 = and i64 %57, %42		; <i64> [#uses=1]
+	%not429 = xor i64 %57, -1		; <i64> [#uses=1]
+	%76 = and i64 %33, %not429		; <i64> [#uses=1]
+	%77 = xor i64 %75, %76		; <i64> [#uses=1]
+	%78 = getelementptr [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
+	%79 = load i64* %78, align 16		; <i64> [#uses=1]
+	%80 = add i64 %77, %20		; <i64> [#uses=1]
+	%81 = add i64 %80, %72		; <i64> [#uses=1]
+	%82 = add i64 %81, %74		; <i64> [#uses=1]
+	%83 = add i64 %82, %79		; <i64> [#uses=1]
+	%asmtmp432 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %59) nounwind		; <i64> [#uses=1]
+	%asmtmp433 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %59) nounwind		; <i64> [#uses=1]
+	%84 = xor i64 %asmtmp432, 0		; <i64> [#uses=1]
+	%85 = xor i64 %84, %asmtmp433		; <i64> [#uses=1]
+	%86 = add i64 %83, %22		; <i64> [#uses=2]
+	%87 = add i64 0, %85		; <i64> [#uses=1]
+	%asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind		; <i64> [#uses=1]
+	%88 = xor i64 0, %asmtmp435		; <i64> [#uses=1]
+	%89 = load i64* null, align 8		; <i64> [#uses=3]
+	%asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind		; <i64> [#uses=1]
+	%asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind		; <i64> [#uses=1]
+	%90 = lshr i64 %89, 6		; <i64> [#uses=1]
+	%91 = xor i64 %asmtmp436, %90		; <i64> [#uses=1]
+	%92 = xor i64 %91, %asmtmp437		; <i64> [#uses=1]
+	%93 = load i64* %63, align 8		; <i64> [#uses=1]
+	%94 = load i64* null, align 8		; <i64> [#uses=1]
+	%95 = add i64 %93, %88		; <i64> [#uses=1]
+	%96 = add i64 %95, %92		; <i64> [#uses=1]
+	%97 = add i64 %96, %94		; <i64> [#uses=2]
+	store i64 %97, i64* %63, align 8
+	%98 = and i64 %86, %57		; <i64> [#uses=1]
+	%not441 = xor i64 %86, -1		; <i64> [#uses=1]
+	%99 = and i64 %42, %not441		; <i64> [#uses=1]
+	%100 = xor i64 %98, %99		; <i64> [#uses=1]
+	%101 = add i64 %100, %33		; <i64> [#uses=1]
+	%102 = add i64 %101, %97		; <i64> [#uses=1]
+	%103 = add i64 %102, 0		; <i64> [#uses=1]
+	%104 = add i64 %103, 0		; <i64> [#uses=1]
+	%e.0 = add i64 %104, %35		; <i64> [#uses=1]
+	br label %bb349
+}

diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
new file mode 100644
index 0000000..d7b9463
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll

@@ -0,0 +1,121 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 83
+; rdar://6802189
+
+; Test if linearscan is unfavoring registers for allocation to allow more reuse
+; of reloads from stack slots.
+
+	%struct.SHA_CTX = type { i32, i32, i32, i32, i32, i32, i32, [16 x i32], i32 }
+
+define fastcc void @sha1_block_data_order(%struct.SHA_CTX* nocapture %c, i8* %p, i64 %num) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%asmtmp511 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=3]
+	%asmtmp513 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=2]
+	%asmtmp516 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
+	%asmtmp517 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%0 = xor i32 0, %asmtmp513		; <i32> [#uses=0]
+	%1 = add i32 0, %asmtmp517		; <i32> [#uses=1]
+	%2 = add i32 %1, 0		; <i32> [#uses=1]
+	%3 = add i32 %2, 0		; <i32> [#uses=1]
+	%asmtmp519 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
+	%4 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
+	%asmtmp520 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %4) nounwind		; <i32> [#uses=2]
+	%5 = xor i32 0, %asmtmp516		; <i32> [#uses=1]
+	%6 = xor i32 %5, %asmtmp519		; <i32> [#uses=1]
+	%7 = add i32 %asmtmp513, -899497514		; <i32> [#uses=1]
+	%8 = add i32 %7, %asmtmp520		; <i32> [#uses=1]
+	%9 = add i32 %8, %6		; <i32> [#uses=1]
+	%10 = add i32 %9, 0		; <i32> [#uses=1]
+	%asmtmp523 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=1]
+	%asmtmp525 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %3) nounwind		; <i32> [#uses=2]
+	%11 = xor i32 0, %asmtmp525		; <i32> [#uses=1]
+	%12 = add i32 0, %11		; <i32> [#uses=1]
+	%13 = add i32 %12, 0		; <i32> [#uses=2]
+	%asmtmp528 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %10) nounwind		; <i32> [#uses=1]
+	%14 = xor i32 0, %asmtmp520		; <i32> [#uses=1]
+	%asmtmp529 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %14) nounwind		; <i32> [#uses=1]
+	%asmtmp530 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %13) nounwind		; <i32> [#uses=1]
+	%15 = add i32 0, %asmtmp530		; <i32> [#uses=1]
+	%16 = xor i32 0, %asmtmp523		; <i32> [#uses=1]
+	%asmtmp532 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %16) nounwind		; <i32> [#uses=2]
+	%asmtmp533 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %15) nounwind		; <i32> [#uses=1]
+	%17 = xor i32 %13, %asmtmp528		; <i32> [#uses=1]
+	%18 = xor i32 %17, 0		; <i32> [#uses=1]
+	%19 = add i32 %asmtmp525, -899497514		; <i32> [#uses=1]
+	%20 = add i32 %19, %asmtmp532		; <i32> [#uses=1]
+	%21 = add i32 %20, %18		; <i32> [#uses=1]
+	%22 = add i32 %21, %asmtmp533		; <i32> [#uses=1]
+	%23 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
+	%24 = xor i32 %23, 0		; <i32> [#uses=1]
+	%asmtmp535 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %24) nounwind		; <i32> [#uses=3]
+	%25 = add i32 0, %asmtmp535		; <i32> [#uses=1]
+	%26 = add i32 %25, 0		; <i32> [#uses=1]
+	%27 = add i32 %26, 0		; <i32> [#uses=1]
+	%28 = xor i32 0, %asmtmp529		; <i32> [#uses=0]
+	%29 = xor i32 %22, 0		; <i32> [#uses=1]
+	%30 = xor i32 %29, 0		; <i32> [#uses=1]
+	%31 = add i32 0, %30		; <i32> [#uses=1]
+	%32 = add i32 %31, 0		; <i32> [#uses=3]
+	%asmtmp541 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%asmtmp542 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %32) nounwind		; <i32> [#uses=1]
+	%33 = add i32 0, %asmtmp541		; <i32> [#uses=1]
+	%34 = add i32 %33, 0		; <i32> [#uses=1]
+	%35 = add i32 %34, %asmtmp542		; <i32> [#uses=1]
+	%asmtmp543 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %27) nounwind		; <i32> [#uses=2]
+	%36 = xor i32 0, %asmtmp535		; <i32> [#uses=0]
+	%37 = xor i32 %32, 0		; <i32> [#uses=1]
+	%38 = xor i32 %37, %asmtmp543		; <i32> [#uses=1]
+	%39 = add i32 0, %38		; <i32> [#uses=1]
+	%40 = add i32 %39, 0		; <i32> [#uses=2]
+	%asmtmp546 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %32) nounwind		; <i32> [#uses=1]
+	%asmtmp547 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%41 = add i32 0, -899497514		; <i32> [#uses=1]
+	%42 = add i32 %41, %asmtmp547		; <i32> [#uses=1]
+	%43 = add i32 %42, 0		; <i32> [#uses=1]
+	%44 = add i32 %43, 0		; <i32> [#uses=3]
+	%asmtmp549 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %35) nounwind		; <i32> [#uses=2]
+	%45 = xor i32 0, %asmtmp541		; <i32> [#uses=1]
+	%asmtmp550 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %45) nounwind		; <i32> [#uses=2]
+	%asmtmp551 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %44) nounwind		; <i32> [#uses=1]
+	%46 = xor i32 %40, %asmtmp546		; <i32> [#uses=1]
+	%47 = xor i32 %46, %asmtmp549		; <i32> [#uses=1]
+	%48 = add i32 %asmtmp543, -899497514		; <i32> [#uses=1]
+	%49 = add i32 %48, %asmtmp550		; <i32> [#uses=1]
+	%50 = add i32 %49, %47		; <i32> [#uses=1]
+	%51 = add i32 %50, %asmtmp551		; <i32> [#uses=1]
+	%asmtmp552 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %40) nounwind		; <i32> [#uses=2]
+	%52 = xor i32 %44, %asmtmp549		; <i32> [#uses=1]
+	%53 = xor i32 %52, %asmtmp552		; <i32> [#uses=1]
+	%54 = add i32 0, %53		; <i32> [#uses=1]
+	%55 = add i32 %54, 0		; <i32> [#uses=2]
+	%asmtmp555 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %44) nounwind		; <i32> [#uses=2]
+	%56 = xor i32 0, %asmtmp532		; <i32> [#uses=1]
+	%57 = xor i32 %56, %asmtmp547		; <i32> [#uses=1]
+	%asmtmp556 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %57) nounwind		; <i32> [#uses=1]
+	%58 = add i32 0, %asmtmp556		; <i32> [#uses=1]
+	%59 = add i32 %58, 0		; <i32> [#uses=1]
+	%60 = add i32 %59, 0		; <i32> [#uses=1]
+	%asmtmp558 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %51) nounwind		; <i32> [#uses=1]
+	%61 = xor i32 %asmtmp517, %asmtmp511		; <i32> [#uses=1]
+	%62 = xor i32 %61, %asmtmp535		; <i32> [#uses=1]
+	%63 = xor i32 %62, %asmtmp550		; <i32> [#uses=1]
+	%asmtmp559 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %63) nounwind		; <i32> [#uses=1]
+	%64 = xor i32 %55, %asmtmp555		; <i32> [#uses=1]
+	%65 = xor i32 %64, %asmtmp558		; <i32> [#uses=1]
+	%asmtmp561 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %55) nounwind		; <i32> [#uses=1]
+	%66 = add i32 %asmtmp552, -899497514		; <i32> [#uses=1]
+	%67 = add i32 %66, %65		; <i32> [#uses=1]
+	%68 = add i32 %67, %asmtmp559		; <i32> [#uses=1]
+	%69 = add i32 %68, 0		; <i32> [#uses=1]
+	%70 = add i32 %69, 0		; <i32> [#uses=1]
+	store i32 %70, i32* null, align 4
+	%71 = add i32 0, %60		; <i32> [#uses=1]
+	store i32 %71, i32* null, align 4
+	%72 = add i32 0, %asmtmp561		; <i32> [#uses=1]
+	store i32 %72, i32* null, align 4
+	%73 = add i32 0, %asmtmp555		; <i32> [#uses=1]
+	store i32 %73, i32* null, align 4
+	br label %bb
+}

diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
new file mode 100644
index 0000000..abbe97a
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll

@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false < %s | \
+; RUN:   FileCheck %s
+; rdar://6808032
+
+; CHECK: pextrw $14
+; CHECK-NEXT: movzbl
+; CHECK-NEXT: (%ebp)
+; CHECK-NEXT: pinsrw
+
+define void @update(i8** %args_list) nounwind {
+entry:
+	%cmp.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %cmp.i, label %if.then.i, label %test_cl.exit
+
+if.then.i:		; preds = %entry
+	%val = load <16 x i8> addrspace(1)* null		; <<16 x i8>> [#uses=8]
+	%tmp10.i = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 undef, i8 0, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef>, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef, i32 29, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp17.i = shufflevector <16 x i8> %tmp10.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 18, i32 4, i32 undef, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp24.i = shufflevector <16 x i8> %tmp17.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 24, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp31.i = shufflevector <16 x i8> %tmp24.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 21, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp38.i = shufflevector <16 x i8> %tmp31.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 27, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp45.i = shufflevector <16 x i8> %tmp38.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 29, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp52.i = shufflevector <16 x i8> %tmp45.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 21, i32 10, i32 11, i32 12, i32 13, i32 14, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp59.i = shufflevector <16 x i8> %tmp52.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 20>		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp59.i, <16 x i8> addrspace(1)* null
+	ret void
+
+test_cl.exit:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
new file mode 100644
index 0000000..c1ec45f
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-24.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -regalloc=local -relocation-model=pic > %t
+; RUN: grep {leal.*TLSGD.*___tls_get_addr} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=local -relocation-model=pic > %t2
+; RUN: grep {leaq.*TLSGD.*__tls_get_addr} %t2
+; PR4004
+
+@i = thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
new file mode 100644
index 0000000..94d3eb2
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
+; rdar://6806252
+
+define i64 @test(i32* %tmp13) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %while.cond, %entry
+	%tmp15 = load i32* %tmp13		; <i32> [#uses=2]
+	%bf.lo = lshr i32 %tmp15, 1		; <i32> [#uses=1]
+	%bf.lo.cleared = and i32 %bf.lo, 2147483647		; <i32> [#uses=1]
+	%conv = zext i32 %bf.lo.cleared to i64		; <i64> [#uses=1]
+	%bf.lo.cleared25 = and i32 %tmp15, 1		; <i32> [#uses=1]
+	%tobool = icmp ne i32 %bf.lo.cleared25, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %while.cond, label %while.end
+
+while.end:		; preds = %while.cond
+	ret i64 %conv
+}

diff --git a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
new file mode 100644
index 0000000..7981a52
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll

@@ -0,0 +1,1457 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR4034
+
+	%struct.BiContextType = type { i16, i8 }
+	%struct.Bitstream = type { i32, i32, i32, i32, i8*, i32 }
+	%struct.DataPartition = type { %struct.Bitstream*, %struct.DecodingEnvironment, i32 (%struct.SyntaxElement*, %struct.ImageParameters*, %struct.DataPartition*)* }
+	%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+	%struct.DecodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32* }
+	%struct.ImageParameters = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.Slice*, %struct.Macroblock*, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i64, i64, %struct.timeb, %struct.timeb, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Macroblock = type { i32, [2 x i32], i32, i32, %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.MotionInfoContexts = type { [4 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+	%struct.PixelPos = type { i32, i32, i32, i32, i32, i32 }
+	%struct.Slice = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DataPartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.ImageParameters*, %struct.inp_par*)*, i32, i32, i32, i32 }
+	%struct.SyntaxElement = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.SyntaxElement*, %struct.ImageParameters*, %struct.DecodingEnvironment*)* }
+	%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+	%struct.inp_par = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.timeb = type { i64, i16, i16, i16 }
+@get_mb_block_pos = external global void (i32, i32*, i32*)*		; <void (i32, i32*, i32*)**> [#uses=1]
+@img = external global %struct.ImageParameters*		; <%struct.ImageParameters**> [#uses=14]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (i32, i32, i32, i32, %struct.PixelPos*)* @getAffNeighbour to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @getAffNeighbour(i32 %curr_mb_nr, i32 %xN, i32 %yN, i32 %is_chroma, %struct.PixelPos* %pix) nounwind {
+entry:
+	%Opq.sa.calc = add i32 0, 2		; <i32> [#uses=2]
+	%0 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=3]
+	%1 = getelementptr %struct.ImageParameters* %0, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%2 = load %struct.Macroblock** %1, align 8		; <%struct.Macroblock*> [#uses=24]
+	%3 = zext i32 %curr_mb_nr to i64		; <i64> [#uses=24]
+	%4 = sext i32 %is_chroma to i64		; <i64> [#uses=8]
+	br label %meshBB392
+
+entry.fragment:		; preds = %meshBB392
+	%Opq.sa.calc747 = add i32 %Opq.sa.calc921, 70		; <i32> [#uses=0]
+	%5 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 0		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=2]
+	%7 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 1		; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4		; <i32> [#uses=5]
+	br label %entry.fragment181
+
+entry.fragment181:		; preds = %entry.fragment
+	%Opq.sa.calc863 = add i32 %Opq.sa.calc921, -50		; <i32> [#uses=4]
+	%9 = getelementptr %struct.PixelPos* %pix, i64 0, i32 0		; <i32*> [#uses=4]
+	store i32 0, i32* %9, align 4
+	%10 = add i32 %8, -1		; <i32> [#uses=6]
+	%11 = icmp slt i32 %10, %yN		; <i1> [#uses=1]
+	br i1 %11, label %meshBB448, label %bb
+
+bb:		; preds = %entry.fragment181
+	%Opq.sa.calc460 = add i32 %Opq.sa.calc863, 50		; <i32> [#uses=0]
+	%12 = add i32 %6, -1		; <i32> [#uses=5]
+	%13 = icmp slt i32 %12, %xN		; <i1> [#uses=1]
+	br label %bb.fragment
+
+bb.fragment:		; preds = %bb
+	%Opq.sa.calc976 = add i32 %Opq.sa.calc863, 13		; <i32> [#uses=3]
+	%.not8 = icmp sgt i32 %yN, -1		; <i1> [#uses=1]
+	%14 = icmp sgt i32 %8, %yN		; <i1> [#uses=1]
+	%or.cond.not = and i1 %14, %.not8		; <i1> [#uses=3]
+	%or.cond1 = and i1 %or.cond.not, %13		; <i1> [#uses=1]
+	br i1 %or.cond1, label %meshBB396, label %bb3
+
+bb3:		; preds = %bb.fragment
+	%Opq.sa.calc462 = sub i32 %Opq.sa.calc976, -152		; <i32> [#uses=5]
+	%Opq.sa.calc461 = sub i32 %Opq.sa.calc462, 168		; <i32> [#uses=2]
+	%15 = icmp slt i32 %xN, 0		; <i1> [#uses=1]
+	br i1 %15, label %bb4, label %meshBB404
+
+bb4:		; preds = %bb3
+	%Opq.sa.calc467 = xor i32 %Opq.sa.calc462, 171		; <i32> [#uses=2]
+	%Opq.sa.calc465 = sub i32 %Opq.sa.calc467, %Opq.sa.calc462		; <i32> [#uses=1]
+	%Opq.sa.calc466 = xor i32 %Opq.sa.calc465, -164		; <i32> [#uses=1]
+	%16 = icmp slt i32 %yN, 0		; <i1> [#uses=1]
+	br i1 %16, label %meshBB428, label %meshBB392
+
+bb5:		; preds = %meshBB428
+	%Opq.sa.calc470 = sub i32 %Opq.sa.calc897, -49		; <i32> [#uses=1]
+	%17 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%18 = load i32* %17, align 4		; <i32> [#uses=1]
+	br label %bb5.fragment
+
+bb5.fragment:		; preds = %bb5
+	%Opq.sa.calc873 = sub i32 %Opq.sa.calc470, 169		; <i32> [#uses=7]
+	%19 = icmp eq i32 %18, 0		; <i1> [#uses=1]
+	%20 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%21 = icmp eq i32 %20, 0		; <i1> [#uses=2]
+	br i1 %19, label %bb6, label %bb13
+
+bb6:		; preds = %bb5.fragment
+	%Opq.sa.calc473 = xor i32 %Opq.sa.calc873, 81		; <i32> [#uses=1]
+	br i1 %21, label %bb7, label %meshBB348
+
+bb7:		; preds = %bb6
+	%Opq.sa.calc476 = add i32 %Opq.sa.calc873, -58		; <i32> [#uses=1]
+	%22 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 8		; <i32> [#uses=1]
+	%24 = add i32 %23, 1		; <i32> [#uses=1]
+	%25 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB388
+
+bb7.fragment:		; preds = %meshBB388
+	%Opq.sa.calc709 = sub i32 %Opq.sa.calc886, 143		; <i32> [#uses=1]
+	%Opq.sa.calc707 = add i32 %Opq.sa.calc709, %Opq.sa.calc886		; <i32> [#uses=1]
+	%Opq.sa.calc708 = xor i32 %Opq.sa.calc707, 474		; <i32> [#uses=0]
+	store i32 %.SV194.phi, i32* %.SV196.phi, align 4
+	%26 = getelementptr %struct.Macroblock* %.load17.SV.phi, i64 %.load36.SV.phi, i32 29		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 8		; <i32> [#uses=2]
+	store i32 %27, i32* %.load67.SV.phi, align 4
+	br label %bb96
+
+bb8:		; preds = %meshBB348
+	%Opq.sa.calc479 = sub i32 %Opq.sa.calc805, 141		; <i32> [#uses=1]
+	%28 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=2]
+	%29 = load i32* %28, align 4		; <i32> [#uses=2]
+	%30 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	br label %meshBB368
+
+bb8.fragment:		; preds = %meshBB368
+	%Opq.sa.calc765 = sub i32 %Opq.sa.calc768, -115		; <i32> [#uses=2]
+	store i32 %.SV198.phi, i32* %.SV200.phi, align 4
+	%31 = getelementptr %struct.Macroblock* %.load16.SV.phi, i64 %.load35.SV.phi, i32 26		; <i32*> [#uses=2]
+	%32 = load i32* %31, align 4		; <i32> [#uses=4]
+	store i32 %32, i32* %.load66.SV.phi, align 4
+	%33 = load i32* %31, align 4		; <i32> [#uses=1]
+	%34 = icmp eq i32 %33, 0		; <i1> [#uses=1]
+	br i1 %34, label %bb96, label %bb9
+
+bb9:		; preds = %bb8.fragment
+	%Opq.sa.calc482 = xor i32 %Opq.sa.calc765, 163		; <i32> [#uses=0]
+	%35 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%36 = getelementptr %struct.ImageParameters* %35, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%37 = load %struct.Macroblock** %36, align 8		; <%struct.Macroblock*> [#uses=1]
+	%38 = load i32* %.SV76.phi, align 4		; <i32> [#uses=1]
+	br label %bb9.fragment
+
+bb9.fragment:		; preds = %bb9
+	%Opq.sa.calc999 = add i32 %Opq.sa.calc765, -44		; <i32> [#uses=1]
+	%39 = sext i32 %38 to i64		; <i64> [#uses=1]
+	%40 = getelementptr %struct.Macroblock* %37, i64 %39, i32 20		; <i32*> [#uses=1]
+	%41 = load i32* %40, align 4		; <i32> [#uses=1]
+	%42 = icmp eq i32 %41, 0		; <i1> [#uses=1]
+	br i1 %42, label %bb96, label %bb11
+
+bb11:		; preds = %bb9.fragment
+	%Opq.sa.calc485 = sub i32 %Opq.sa.calc999, 200		; <i32> [#uses=2]
+	%43 = add i32 %.SV78.phi, 1		; <i32> [#uses=1]
+	br label %meshBB332
+
+bb11.fragment:		; preds = %meshBB332
+	%Opq.sa.calc954 = xor i32 %Opq.link.mask859, 233		; <i32> [#uses=0]
+	store i32 %.SV206.phi, i32* %.load81.SV.phi, align 4
+	%44 = add i32 %.load50.SV.phi, %yN		; <i32> [#uses=1]
+	%45 = ashr i32 %44, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb13:		; preds = %bb5.fragment
+	%Opq.sa.calc490 = xor i32 %Opq.sa.calc873, 175		; <i32> [#uses=1]
+	%Opq.sa.calc488 = sub i32 %Opq.sa.calc490, %Opq.sa.calc873		; <i32> [#uses=1]
+	%Opq.sa.calc489 = sub i32 %Opq.sa.calc488, 133		; <i32> [#uses=1]
+	%46 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25		; <i32*> [#uses=1]
+	br label %meshBB360
+
+bb13.fragment:		; preds = %meshBB360
+	%Opq.sa.calc870 = add i32 %Opq.sa.calc866, -129		; <i32> [#uses=3]
+	%47 = load i32* %.SV208.phi, align 8		; <i32> [#uses=3]
+	br i1 %.load74.SV.phi, label %bb14, label %meshBB412
+
+bb14:		; preds = %bb13.fragment
+	%Opq.sa.calc493 = add i32 %Opq.sa.calc870, 103		; <i32> [#uses=1]
+	%48 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %47, i32* %48, align 4
+	%49 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29		; <i32*> [#uses=2]
+	br label %bb14.fragment
+
+bb14.fragment:		; preds = %bb14
+	%Opq.sa.calc723 = sub i32 %Opq.sa.calc493, 117		; <i32> [#uses=4]
+	%50 = load i32* %49, align 8		; <i32> [#uses=4]
+	store i32 %50, i32* %.SV52.phi1113, align 4
+	%51 = load i32* %49, align 8		; <i32> [#uses=1]
+	%52 = icmp eq i32 %51, 0		; <i1> [#uses=1]
+	br i1 %52, label %meshBB, label %bb15
+
+bb15:		; preds = %bb14.fragment
+	%Opq.sa.calc496 = sub i32 %Opq.sa.calc723, -8		; <i32> [#uses=1]
+	%53 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%54 = getelementptr %struct.ImageParameters* %53, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%55 = load %struct.Macroblock** %54, align 8		; <%struct.Macroblock*> [#uses=1]
+	%56 = load i32* %.SV208.phi, align 8		; <i32> [#uses=1]
+	br label %meshBB324
+
+bb15.fragment:		; preds = %meshBB324
+	%Opq.sa.calc925 = xor i32 %Opq.sa.calc750, 215		; <i32> [#uses=2]
+	%57 = sext i32 %.SV214.phi to i64		; <i64> [#uses=1]
+	%58 = getelementptr %struct.Macroblock* %.SV212.phi, i64 %57, i32 20		; <i32*> [#uses=1]
+	%59 = load i32* %58, align 4		; <i32> [#uses=1]
+	%60 = icmp eq i32 %59, 0		; <i1> [#uses=1]
+	br i1 %60, label %bb16, label %bb96
+
+bb16:		; preds = %bb15.fragment
+	%Opq.sa.calc499 = sub i32 %Opq.sa.calc925, -140		; <i32> [#uses=0]
+	%61 = add i32 %.SV87.phi, 1		; <i32> [#uses=1]
+	br label %bb16.fragment
+
+bb16.fragment:		; preds = %bb16
+	%Opq.sa.calc968 = add i32 %Opq.sa.calc925, 129		; <i32> [#uses=0]
+	store i32 %61, i32* %.SV91.phi, align 4
+	%62 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb19:		; preds = %meshBB412
+	%Opq.sa.calc502 = sub i32 %Opq.sa.calc932, -94		; <i32> [#uses=0]
+	%63 = add i32 %.SV87.phi1030, 1		; <i32> [#uses=1]
+	%64 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb19.fragment
+
+bb19.fragment:		; preds = %bb19
+	%Opq.sa.calc880 = xor i32 %Opq.sa.calc932, 246		; <i32> [#uses=0]
+	store i32 %63, i32* %64, align 4
+	%65 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29		; <i32*> [#uses=1]
+	%66 = load i32* %65, align 8		; <i32> [#uses=2]
+	store i32 %66, i32* %.SV52.phi1186, align 4
+	br label %bb96
+
+bb21:		; preds = %meshBB392
+	%Opq.sa.calc505 = add i32 %Opq.sa.calc921, -40		; <i32> [#uses=2]
+	br i1 %or.cond.not.SV.phi, label %meshBB360, label %bb97
+
+bb23:		; preds = %meshBB360
+	%Opq.sa.calc509 = xor i32 %Opq.sa.calc866, 70		; <i32> [#uses=1]
+	%Opq.sa.calc508 = sub i32 %Opq.sa.calc509, -19		; <i32> [#uses=0]
+	%67 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%69 = icmp eq i32 %68, 0		; <i1> [#uses=1]
+	%70 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%71 = icmp eq i32 %70, 0		; <i1> [#uses=2]
+	br label %bb23.fragment
+
+bb23.fragment:		; preds = %bb23
+	%Opq.sa.calc847 = sub i32 %Opq.sa.calc866, -9		; <i32> [#uses=2]
+	%72 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=3]
+	%73 = load i32* %72, align 4		; <i32> [#uses=3]
+	%74 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
+	store i32 %73, i32* %74, align 4
+	br label %bb23.fragment182
+
+bb23.fragment182:		; preds = %bb23.fragment
+	%Opq.sa.calc744 = xor i32 %Opq.sa.calc847, 152		; <i32> [#uses=4]
+	%Opq.sa.calc742 = add i32 %Opq.sa.calc744, %Opq.sa.calc847		; <i32> [#uses=1]
+	%Opq.sa.calc743 = add i32 %Opq.sa.calc742, -149		; <i32> [#uses=2]
+	%75 = getelementptr %struct.Macroblock* %2, i64 %3, i32 26		; <i32*> [#uses=2]
+	%76 = load i32* %75, align 4		; <i32> [#uses=3]
+	store i32 %76, i32* %.SV52.phi1113, align 4
+	%77 = load i32* %75, align 4		; <i32> [#uses=1]
+	%78 = icmp ne i32 %77, 0		; <i1> [#uses=2]
+	br i1 %69, label %meshBB344, label %meshBB432
+
+bb24:		; preds = %meshBB344
+	%Opq.sa.calc512 = add i32 %Opq.sa.calc716, -55		; <i32> [#uses=3]
+	br i1 %.SV96.phi, label %bb25, label %bb32
+
+bb25:		; preds = %bb24
+	%Opq.sa.calc515 = sub i32 %Opq.sa.calc716, 18		; <i32> [#uses=1]
+	br i1 %.SV135.phi, label %bb26, label %bb96
+
+bb26:		; preds = %bb25
+	%Opq.sa.calc519 = xor i32 %Opq.sa.calc515, 23		; <i32> [#uses=2]
+	%Opq.sa.calc518 = xor i32 %Opq.sa.calc519, 84		; <i32> [#uses=1]
+	%79 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%80 = getelementptr %struct.ImageParameters* %79, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%81 = load %struct.Macroblock** %80, align 8		; <%struct.Macroblock*> [#uses=1]
+	%82 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	br label %meshBB340
+
+bb26.fragment:		; preds = %meshBB340
+	%Opq.sa.calc918 = xor i32 %Opq.sa.calc754, 228		; <i32> [#uses=4]
+	%Opq.sa.calc916 = add i32 %Opq.sa.calc918, %Opq.sa.calc754		; <i32> [#uses=1]
+	%Opq.sa.calc917 = add i32 %Opq.sa.calc916, -237		; <i32> [#uses=1]
+	%83 = sext i32 %.SV230.phi to i64		; <i64> [#uses=1]
+	%84 = getelementptr %struct.Macroblock* %.SV228.phi, i64 %83, i32 20		; <i32*> [#uses=1]
+	%85 = load i32* %84, align 4		; <i32> [#uses=1]
+	%86 = icmp eq i32 %85, 0		; <i1> [#uses=1]
+	br i1 %86, label %meshBB420, label %meshBB356
+
+bb28:		; preds = %meshBB356
+	%Opq.sa.calc522 = xor i32 %Opq.sa.calc983, 107		; <i32> [#uses=2]
+	%87 = and i32 %yN, 1		; <i32> [#uses=1]
+	%88 = icmp eq i32 %87, 0		; <i1> [#uses=1]
+	br i1 %88, label %bb29, label %bb30
+
+bb29:		; preds = %bb28
+	%Opq.sa.calc525 = xor i32 %Opq.sa.calc522, 151		; <i32> [#uses=2]
+	%89 = ashr i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB340
+
+bb30:		; preds = %bb28
+	%Opq.sa.calc528 = sub i32 %Opq.sa.calc522, -64		; <i32> [#uses=1]
+	%90 = add i32 %.SV104.phi1160, 1		; <i32> [#uses=1]
+	br label %bb30.fragment
+
+bb30.fragment:		; preds = %bb30
+	%Opq.sa.calc791 = add i32 %Opq.sa.calc528, -14		; <i32> [#uses=0]
+	store i32 %90, i32* %.SV111.phi1159, align 4
+	%91 = ashr i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb32:		; preds = %bb24
+	%Opq.sa.calc531 = xor i32 %Opq.sa.calc512, 50		; <i32> [#uses=1]
+	br i1 %.SV135.phi, label %bb33, label %meshBB324
+
+bb33:		; preds = %bb32
+	%Opq.sa.calc534 = sub i32 %Opq.sa.calc512, -75		; <i32> [#uses=2]
+	%92 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%93 = getelementptr %struct.ImageParameters* %92, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%94 = load %struct.Macroblock** %93, align 8		; <%struct.Macroblock*> [#uses=1]
+	%95 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	br label %bb33.fragment
+
+bb33.fragment:		; preds = %bb33
+	%Opq.sa.calc712 = add i32 %Opq.sa.calc534, -109		; <i32> [#uses=3]
+	%96 = sext i32 %95 to i64		; <i64> [#uses=1]
+	%97 = getelementptr %struct.Macroblock* %94, i64 %96, i32 20		; <i32*> [#uses=1]
+	%98 = load i32* %97, align 4		; <i32> [#uses=1]
+	%99 = icmp eq i32 %98, 0		; <i1> [#uses=1]
+	br i1 %99, label %bb34, label %meshBB
+
+bb34:		; preds = %bb33.fragment
+	%Opq.sa.calc537 = add i32 %Opq.sa.calc712, 8		; <i32> [#uses=1]
+	%100 = add i32 %.SV104.phi, 1		; <i32> [#uses=1]
+	br label %meshBB328
+
+bb34.fragment:		; preds = %meshBB328
+	%Opq.sa.calc965 = xor i32 %Opq.sa.calc787, 251		; <i32> [#uses=0]
+	store i32 %.SV238.phi, i32* %.load116.SV.phi, align 4
+	br label %bb96
+
+bb35:		; preds = %meshBB
+	%Opq.sa.calc541 = add i32 %Opq.sa.calc828, -112		; <i32> [#uses=3]
+	%Opq.sa.calc540 = xor i32 %Opq.sa.calc541, 3		; <i32> [#uses=1]
+	%101 = and i32 %yN, 1		; <i32> [#uses=1]
+	%102 = icmp eq i32 %101, 0		; <i1> [#uses=1]
+	br i1 %102, label %meshBB372, label %meshBB448
+
+bb36:		; preds = %meshBB372
+	%Opq.sa.calc544 = sub i32 %Opq.sa.calc812, -10		; <i32> [#uses=0]
+	%103 = add i32 %.SV43.phi1015, %yN		; <i32> [#uses=1]
+	br label %bb36.fragment
+
+bb36.fragment:		; preds = %bb36
+	%Opq.sa.calc762 = add i32 %Opq.sa.calc812, -69		; <i32> [#uses=0]
+	%104 = ashr i32 %103, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb37:		; preds = %meshBB448
+	%Opq.sa.calc547 = add i32 %Opq.sa.calc958, -49		; <i32> [#uses=1]
+	%105 = add i32 %.SV104.phi1157, 1		; <i32> [#uses=1]
+	br label %meshBB348
+
+bb37.fragment:		; preds = %meshBB348
+	%Opq.sa.calc728 = add i32 %Opq.sa.calc805, -5		; <i32> [#uses=0]
+	store i32 %.SV242.phi, i32* %.load115.SV.phi, align 4
+	%106 = add i32 %.load48.SV.phi, %yN		; <i32> [#uses=1]
+	%107 = ashr i32 %106, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb39:		; preds = %meshBB432
+	%Opq.sa.calc550 = sub i32 %Opq.sa.calc798, -214		; <i32> [#uses=0]
+	br i1 %.SV96.phi1038, label %bb40, label %bb48
+
+bb40:		; preds = %bb39
+	%Opq.sa.calc554 = xor i32 %Opq.sa.calc798, 14		; <i32> [#uses=4]
+	%Opq.sa.calc553 = sub i32 %Opq.sa.calc554, 7		; <i32> [#uses=1]
+	br i1 %.SV135.phi1039, label %meshBB336, label %meshBB444
+
+bb41:		; preds = %meshBB336
+	%Opq.sa.calc557 = sub i32 %Opq.sa.calc979, 143		; <i32> [#uses=1]
+	%108 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%109 = getelementptr %struct.ImageParameters* %108, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%110 = load %struct.Macroblock** %109, align 8		; <%struct.Macroblock*> [#uses=1]
+	%111 = load i32* %.SV99.phi1128, align 4		; <i32> [#uses=1]
+	br label %bb41.fragment
+
+bb41.fragment:		; preds = %bb41
+	%Opq.sa.calc987 = xor i32 %Opq.sa.calc557, 213		; <i32> [#uses=4]
+	%112 = sext i32 %111 to i64		; <i64> [#uses=1]
+	%113 = getelementptr %struct.Macroblock* %110, i64 %112, i32 20		; <i32*> [#uses=1]
+	%114 = load i32* %113, align 4		; <i32> [#uses=1]
+	%115 = icmp eq i32 %114, 0		; <i1> [#uses=1]
+	br i1 %115, label %bb42, label %bb96
+
+bb42:		; preds = %bb41.fragment
+	%Opq.sa.calc560 = add i32 %Opq.sa.calc987, -221		; <i32> [#uses=1]
+	%116 = ashr i32 %.SV43.phi1230, 1		; <i32> [#uses=1]
+	%117 = icmp sgt i32 %116, %yN		; <i1> [#uses=1]
+	br i1 %117, label %meshBB432, label %bb44
+
+bb43:		; preds = %meshBB432
+	%Opq.sa.calc563 = xor i32 %Opq.sa.calc798, 31		; <i32> [#uses=0]
+	%118 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb44:		; preds = %bb42
+	%Opq.sa.calc566 = sub i32 %Opq.sa.calc987, 217		; <i32> [#uses=1]
+	%119 = add i32 %.SV104.phi1127, 1		; <i32> [#uses=1]
+	br label %meshBB332
+
+bb44.fragment:		; preds = %meshBB332
+	%Opq.sa.calc894 = add i32 %Opq.sa.calc856, -200		; <i32> [#uses=1]
+	store i32 %.SV248.phi, i32* %.load114.SV.phi, align 4
+	%120 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%121 = sub i32 %120, %.load46.SV.phi		; <i32> [#uses=1]
+	br label %meshBB376
+
+bb48:		; preds = %bb39
+	%Opq.sa.calc569 = sub i32 %Opq.sa.calc798, -110		; <i32> [#uses=1]
+	br i1 %.SV135.phi1039, label %bb49, label %bb96
+
+bb49:		; preds = %bb48
+	%Opq.sa.calc572 = add i32 %Opq.sa.calc798, 84		; <i32> [#uses=0]
+	%122 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%123 = getelementptr %struct.ImageParameters* %122, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%124 = load %struct.Macroblock** %123, align 8		; <%struct.Macroblock*> [#uses=1]
+	%125 = load i32* %.SV99.phi1037, align 4		; <i32> [#uses=1]
+	br label %bb49.fragment
+
+bb49.fragment:		; preds = %bb49
+	%Opq.sa.calc860 = sub i32 %Opq.sa.calc569, 114		; <i32> [#uses=5]
+	%126 = sext i32 %125 to i64		; <i64> [#uses=1]
+	%127 = getelementptr %struct.Macroblock* %124, i64 %126, i32 20		; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4		; <i32> [#uses=1]
+	%129 = icmp eq i32 %128, 0		; <i1> [#uses=1]
+	br i1 %129, label %bb50, label %meshBB380
+
+bb50:		; preds = %bb49.fragment
+	%Opq.sa.calc577 = add i32 %Opq.sa.calc860, 12		; <i32> [#uses=2]
+	%130 = ashr i32 %.SV43.phi1178, 1		; <i32> [#uses=1]
+	%131 = icmp sgt i32 %130, %yN		; <i1> [#uses=1]
+	br i1 %131, label %meshBB328, label %bb52
+
+bb51:		; preds = %meshBB328
+	%Opq.sa.calc580 = xor i32 %Opq.sa.calc787, 194		; <i32> [#uses=0]
+	%132 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%133 = or i32 %132, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb52:		; preds = %bb50
+	%Opq.sa.calc584 = sub i32 %Opq.sa.calc860, -65		; <i32> [#uses=2]
+	%Opq.sa.calc583 = sub i32 %Opq.sa.calc584, 50		; <i32> [#uses=1]
+	%134 = add i32 %.SV104.phi1036, 1		; <i32> [#uses=1]
+	store i32 %134, i32* %.SV111.phi1035, align 4
+	br label %meshBB384
+
+bb52.fragment:		; preds = %meshBB384
+	%Opq.sa.calc844 = add i32 %Opq.sa.calc901, -214		; <i32> [#uses=1]
+	%135 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%136 = or i32 %135, 1		; <i32> [#uses=1]
+	%137 = sub i32 %136, %.load44.SV.phi		; <i32> [#uses=1]
+	br label %meshBB388
+
+bb54:		; preds = %meshBB380
+	%Opq.sa.calc589 = add i32 %Opq.sa.calc946, 108		; <i32> [#uses=1]
+	%138 = add i32 %.SV104.phi1124, 1		; <i32> [#uses=1]
+	br label %bb54.fragment
+
+bb54.fragment:		; preds = %bb54
+	%Opq.sa.calc883 = xor i32 %Opq.sa.calc589, 119		; <i32> [#uses=2]
+	store i32 %138, i32* %.SV111.phi1123, align 4
+	br label %meshBB440
+
+bb56:		; preds = %meshBB404
+	%Opq.sa.calc592 = sub i32 %Opq.sa.calc939, 87		; <i32> [#uses=2]
+	%.not4 = icmp sgt i32 %xN, -1		; <i1> [#uses=1]
+	%139 = icmp sgt i32 %.SV40.phi, %xN		; <i1> [#uses=1]
+	br label %meshBB364
+
+bb56.fragment:		; preds = %meshBB364
+	%Opq.sa.calc1002 = xor i32 %Opq.link.mask737, 77		; <i32> [#uses=6]
+	%or.cond5 = and i1 %.SV256.phi, %.not4.SV.phi		; <i1> [#uses=1]
+	%140 = icmp slt i32 %yN, 0		; <i1> [#uses=2]
+	br i1 %or.cond5, label %bb58, label %bb83
+
+bb58:		; preds = %bb56.fragment
+	%Opq.sa.calc596 = xor i32 %Opq.sa.calc1002, 73		; <i32> [#uses=1]
+	%Opq.sa.calc595 = add i32 %Opq.sa.calc596, 147		; <i32> [#uses=0]
+	br i1 %140, label %bb59, label %bb76
+
+bb59:		; preds = %bb58
+	%Opq.sa.calc599 = add i32 %Opq.sa.calc1002, 151		; <i32> [#uses=0]
+	%141 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%142 = load i32* %141, align 4		; <i32> [#uses=1]
+	br label %bb59.fragment
+
+bb59.fragment:		; preds = %bb59
+	%Opq.sa.calc731 = sub i32 %Opq.sa.calc1002, -161		; <i32> [#uses=3]
+	%143 = icmp eq i32 %142, 0		; <i1> [#uses=1]
+	%144 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%145 = icmp eq i32 %144, 0		; <i1> [#uses=2]
+	br i1 %143, label %bb60, label %bb68
+
+bb60:		; preds = %bb59.fragment
+	%Opq.sa.calc602 = xor i32 %Opq.sa.calc731, 1		; <i32> [#uses=2]
+	br i1 %145, label %bb61, label %bb66
+
+bb61:		; preds = %bb60
+	%Opq.sa.calc605 = xor i32 %Opq.sa.calc731, 57		; <i32> [#uses=1]
+	%146 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=2]
+	%147 = load i32* %146, align 8		; <i32> [#uses=3]
+	%148 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
+	br label %bb61.fragment
+
+bb61.fragment:		; preds = %bb61
+	%Opq.sa.calc700 = sub i32 %Opq.sa.calc605, 108		; <i32> [#uses=3]
+	store i32 %147, i32* %148, align 4
+	%149 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=4]
+	%150 = load i32* %149, align 8		; <i32> [#uses=1]
+	%151 = icmp eq i32 %150, 0		; <i1> [#uses=1]
+	br i1 %151, label %bb65, label %bb62
+
+bb62:		; preds = %bb61.fragment
+	%Opq.sa.calc608 = add i32 %Opq.sa.calc700, -94		; <i32> [#uses=1]
+	%152 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=2]
+	%153 = getelementptr %struct.ImageParameters* %152, i64 0, i32 45		; <i32*> [#uses=1]
+	%154 = load i32* %153, align 4		; <i32> [#uses=1]
+	%155 = icmp eq i32 %154, 1		; <i1> [#uses=1]
+	br i1 %155, label %bb63, label %bb64
+
+bb63:		; preds = %bb62
+	%Opq.sa.calc611 = add i32 %Opq.sa.calc700, -101		; <i32> [#uses=2]
+	%156 = getelementptr %struct.ImageParameters* %152, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%157 = load %struct.Macroblock** %156, align 8		; <%struct.Macroblock*> [#uses=1]
+	%158 = load i32* %146, align 8		; <i32> [#uses=1]
+	br label %meshBB452
+
+bb63.fragment:		; preds = %meshBB452
+	%Opq.sa.calc891 = add i32 %Opq.link.mask823, 18		; <i32> [#uses=2]
+	%Opq.sa.calc890 = add i32 %Opq.sa.calc891, -3		; <i32> [#uses=2]
+	%159 = sext i32 %.SV266.phi to i64		; <i64> [#uses=1]
+	%160 = getelementptr %struct.Macroblock* %.SV264.phi, i64 %159, i32 20		; <i32*> [#uses=1]
+	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%162 = icmp eq i32 %161, 0		; <i1> [#uses=1]
+	br i1 %162, label %bb64, label %meshBB456
+
+bb64:		; preds = %bb63.fragment, %bb62
+	%.SV38.phi1132 = phi i64 [ %.SV38.phi1110, %bb63.fragment ], [ %.SV38.phi1098, %bb62 ]		; <i64> [#uses=1]
+	%.SV52.phi1131 = phi i32* [ %.SV52.phi1109, %bb63.fragment ], [ %.SV52.phi1097, %bb62 ]		; <i32*> [#uses=1]
+	%.SV68.phi1130 = phi i32 [ %.SV68.phi1108, %bb63.fragment ], [ %.SV68.phi1096, %bb62 ]		; <i32> [#uses=1]
+	%.SV70.phi1129 = phi i32 [ %.SV70.phi1107, %bb63.fragment ], [ %.SV70.phi1095, %bb62 ]		; <i32> [#uses=1]
+	%Opq.link.SV615.phi = phi i32 [ %Opq.sa.calc890, %bb63.fragment ], [ %Opq.sa.calc608, %bb62 ]		; <i32> [#uses=1]
+	%.SV150.phi = phi i32* [ %.SV150.phi1060, %bb63.fragment ], [ %148, %bb62 ]		; <i32*> [#uses=1]
+	%.SV152.phi = phi i32* [ %.SV152.phi1059, %bb63.fragment ], [ %149, %bb62 ]		; <i32*> [#uses=1]
+	%.SV148.phi = phi i32 [ %.SV148.phi1057, %bb63.fragment ], [ %147, %bb62 ]		; <i32> [#uses=1]
+	%Opq.link.mask = and i32 %Opq.link.SV615.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc614 = add i32 %Opq.link.mask, 189		; <i32> [#uses=1]
+	%163 = add i32 %.SV148.phi, 1		; <i32> [#uses=1]
+	store i32 %163, i32* %.SV150.phi, align 4
+	br label %bb65
+
+bb65:		; preds = %meshBB456, %bb64, %bb61.fragment
+	%.SV38.phi1144 = phi i64 [ %.SV38.phi1137, %meshBB456 ], [ %.SV38.phi1098, %bb61.fragment ], [ %.SV38.phi1132, %bb64 ]		; <i64> [#uses=1]
+	%.SV52.phi1143 = phi i32* [ %.SV52.phi1136, %meshBB456 ], [ %.SV52.phi1097, %bb61.fragment ], [ %.SV52.phi1131, %bb64 ]		; <i32*> [#uses=1]
+	%.SV68.phi1142 = phi i32 [ %.SV68.phi1135, %meshBB456 ], [ %.SV68.phi1096, %bb61.fragment ], [ %.SV68.phi1130, %bb64 ]		; <i32> [#uses=1]
+	%.SV70.phi1141 = phi i32 [ %.SV70.phi1134, %meshBB456 ], [ %.SV70.phi1095, %bb61.fragment ], [ %.SV70.phi1129, %bb64 ]		; <i32> [#uses=1]
+	%.SV152.phi1058 = phi i32* [ %.SV152.phi1133, %meshBB456 ], [ %149, %bb61.fragment ], [ %.SV152.phi, %bb64 ]		; <i32*> [#uses=1]
+	%Opq.link.SV618.phi = phi i32 [ %Opq.sa.calc816, %meshBB456 ], [ %Opq.sa.calc700, %bb61.fragment ], [ %Opq.sa.calc614, %bb64 ]		; <i32> [#uses=1]
+	%Opq.link.mask620 = and i32 %Opq.link.SV618.phi, 40		; <i32> [#uses=1]
+	%Opq.sa.calc617 = add i32 %Opq.link.mask620, -35		; <i32> [#uses=2]
+	%164 = load i32* %.SV152.phi1058, align 8		; <i32> [#uses=1]
+	br label %meshBB436
+
+bb65.fragment:		; preds = %meshBB436
+	%Opq.sa.calc832 = add i32 %Opq.link.mask706, 1		; <i32> [#uses=2]
+	store i32 %.SV268.phi, i32* %.load62.SV.phi, align 4
+	br label %meshBB364
+
+bb66:		; preds = %bb60
+	%Opq.sa.calc621 = add i32 %Opq.sa.calc602, -217		; <i32> [#uses=1]
+	%165 = add i32 %curr_mb_nr, -1		; <i32> [#uses=1]
+	%166 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB420
+
+bb66.fragment:		; preds = %meshBB420
+	%Opq.sa.calc795 = xor i32 %Opq.sa.calc837, 105		; <i32> [#uses=2]
+	%Opq.sa.calc794 = sub i32 %Opq.sa.calc795, 167		; <i32> [#uses=1]
+	store i32 %.SV270.phi, i32* %.SV272.phi, align 4
+	store i32 1, i32* %.load61.SV.phi, align 4
+	br label %meshBB444
+
+bb68:		; preds = %bb59.fragment
+	%Opq.sa.calc624 = sub i32 %Opq.sa.calc731, 229		; <i32> [#uses=3]
+	%167 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=1]
+	br label %meshBB344
+
+bb68.fragment:		; preds = %meshBB344
+	%Opq.sa.calc784 = sub i32 %Opq.link.mask722, 3		; <i32> [#uses=5]
+	%168 = load i32* %.SV274.phi, align 8		; <i32> [#uses=3]
+	br i1 %.load144.SV.phi, label %bb69, label %meshBB412
+
+bb69:		; preds = %bb68.fragment
+	%Opq.sa.calc627 = add i32 %Opq.sa.calc784, 163		; <i32> [#uses=0]
+	%169 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %168, i32* %169, align 4
+	%170 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=2]
+	br label %bb69.fragment
+
+bb69.fragment:		; preds = %bb69
+	%Opq.sa.calc996 = sub i32 %Opq.sa.calc784, -9		; <i32> [#uses=3]
+	%Opq.sa.calc994 = sub i32 %Opq.sa.calc996, %Opq.sa.calc784		; <i32> [#uses=1]
+	%Opq.sa.calc995 = sub i32 %Opq.sa.calc994, 3		; <i32> [#uses=2]
+	%171 = load i32* %170, align 8		; <i32> [#uses=3]
+	store i32 %171, i32* %.SV52.phi1170, align 4
+	%172 = load i32* %170, align 8		; <i32> [#uses=1]
+	%173 = icmp eq i32 %172, 0		; <i1> [#uses=1]
+	br i1 %173, label %meshBB396, label %meshBB400
+
+bb70:		; preds = %meshBB400
+	%Opq.sa.calc630 = add i32 %Opq.sa.calc824, -203		; <i32> [#uses=2]
+	%174 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%175 = getelementptr %struct.ImageParameters* %174, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%176 = load %struct.Macroblock** %175, align 8		; <%struct.Macroblock*> [#uses=1]
+	%177 = load i32* %.SV156.phi, align 8		; <i32> [#uses=1]
+	br label %meshBB428
+
+bb70.fragment:		; preds = %meshBB428
+	%Opq.sa.calc739 = xor i32 %Opq.sa.calc897, 213		; <i32> [#uses=2]
+	%Opq.sa.calc738 = sub i32 %Opq.sa.calc739, 1		; <i32> [#uses=2]
+	%178 = sext i32 %.SV280.phi to i64		; <i64> [#uses=1]
+	%179 = getelementptr %struct.Macroblock* %.SV278.phi, i64 %178, i32 20		; <i32*> [#uses=1]
+	%180 = load i32* %179, align 4		; <i32> [#uses=1]
+	%181 = icmp eq i32 %180, 0		; <i1> [#uses=1]
+	br i1 %181, label %meshBB452, label %meshBB356
+
+bb71:		; preds = %meshBB452
+	%Opq.sa.calc633 = xor i32 %Opq.sa.calc820, 118		; <i32> [#uses=1]
+	%182 = add i32 %.SV158.phi1106, 1		; <i32> [#uses=1]
+	br label %meshBB352
+
+bb71.fragment:		; preds = %meshBB352
+	%Opq.sa.calc809 = sub i32 %Opq.sa.calc876, 17		; <i32> [#uses=2]
+	store i32 %.SV282.phi, i32* %.load163.SV.phi, align 4
+	%183 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB436
+
+bb74:		; preds = %meshBB412
+	%Opq.sa.calc636 = xor i32 %Opq.sa.calc932, 233		; <i32> [#uses=1]
+	%184 = add i32 %.SV158.phi1063, 1		; <i32> [#uses=1]
+	%185 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb74.fragment
+
+bb74.fragment:		; preds = %bb74
+	%Opq.sa.calc1011 = sub i32 %Opq.sa.calc636, -19		; <i32> [#uses=0]
+	store i32 %184, i32* %185, align 4
+	%186 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=1]
+	%187 = load i32* %186, align 8		; <i32> [#uses=2]
+	store i32 %187, i32* %.SV52.phi1186, align 4
+	br label %bb96
+
+bb76:		; preds = %bb58
+	%Opq.sa.calc640 = xor i32 %Opq.sa.calc1002, 71		; <i32> [#uses=4]
+	%Opq.sa.calc639 = xor i32 %Opq.sa.calc640, 219		; <i32> [#uses=0]
+	%188 = icmp eq i32 %yN, 0		; <i1> [#uses=1]
+	br i1 %188, label %bb77, label %bb79
+
+bb77:		; preds = %bb76
+	%Opq.sa.calc643 = add i32 %Opq.sa.calc640, 2		; <i32> [#uses=2]
+	%189 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%190 = getelementptr %struct.ImageParameters* %189, i64 0, i32 45		; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4		; <i32> [#uses=1]
+	%192 = icmp eq i32 %191, 2		; <i1> [#uses=1]
+	br i1 %192, label %meshBB416, label %bb79
+
+bb78:		; preds = %meshBB416
+	%Opq.sa.calc647 = xor i32 %Opq.sa.calc971, 25		; <i32> [#uses=2]
+	%Opq.sa.calc646 = sub i32 %Opq.sa.calc647, 29		; <i32> [#uses=0]
+	%193 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=1]
+	%194 = load i32* %193, align 8		; <i32> [#uses=1]
+	%195 = add i32 %194, 1		; <i32> [#uses=1]
+	br label %bb78.fragment
+
+bb78.fragment:		; preds = %bb78
+	%Opq.sa.calc850 = sub i32 %Opq.sa.calc647, -93		; <i32> [#uses=0]
+	%196 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 %195, i32* %196, align 4
+	store i32 1, i32* %.SV52.phi1200, align 4
+	%197 = add i32 %yN, -1		; <i32> [#uses=1]
+	br label %bb98
+
+bb79:		; preds = %bb77, %bb76
+	%Opq.link.SV652.phi = phi i32 [ %Opq.sa.calc643, %bb77 ], [ %Opq.sa.calc640, %bb76 ]		; <i32> [#uses=1]
+	%Opq.link.mask654 = and i32 %Opq.link.SV652.phi, 8		; <i32> [#uses=1]
+	%Opq.sa.calc651 = sub i32 %Opq.link.mask654, -2		; <i32> [#uses=3]
+	%Opq.sa.calc650 = xor i32 %Opq.sa.calc651, 1		; <i32> [#uses=2]
+	br i1 %or.cond.not.SV.phi1094, label %meshBB456, label %meshBB352
+
+bb81:		; preds = %meshBB456
+	%Opq.sa.calc655 = add i32 %Opq.sa.calc816, 56		; <i32> [#uses=0]
+	%198 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 %curr_mb_nr, i32* %198, align 4
+	store i32 1, i32* %.SV52.phi1136, align 4
+	br label %bb98
+
+bb83:		; preds = %bb56.fragment
+	%Opq.sa.calc658 = sub i32 %Opq.sa.calc1002, 73		; <i32> [#uses=3]
+	br i1 %140, label %bb84, label %meshBB424
+
+bb84:		; preds = %bb83
+	%Opq.sa.calc661 = xor i32 %Opq.sa.calc658, 22		; <i32> [#uses=1]
+	%199 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%200 = load i32* %199, align 4		; <i32> [#uses=1]
+	br label %meshBB400
+
+bb84.fragment:		; preds = %meshBB400
+	%Opq.sa.calc802 = xor i32 %Opq.sa.calc824, 240		; <i32> [#uses=3]
+	%201 = icmp eq i32 %.SV290.phi, 0		; <i1> [#uses=1]
+	%202 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%203 = icmp eq i32 %202, 0		; <i1> [#uses=2]
+	br i1 %201, label %meshBB372, label %bb89
+
+bb85:		; preds = %meshBB372
+	%Opq.sa.calc667 = sub i32 %Opq.sa.calc812, 20		; <i32> [#uses=3]
+	%Opq.sa.calc666 = sub i32 %Opq.sa.calc667, 84		; <i32> [#uses=2]
+	%Opq.sa.calc664 = add i32 %Opq.sa.calc666, %Opq.sa.calc667		; <i32> [#uses=1]
+	%Opq.sa.calc665 = add i32 %Opq.sa.calc664, -112		; <i32> [#uses=2]
+	br i1 %.SV167.phi, label %meshBB336, label %meshBB440
+
+bb86:		; preds = %meshBB336
+	%Opq.sa.calc670 = sub i32 %Opq.sa.calc979, 35		; <i32> [#uses=1]
+	%204 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24		; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4		; <i32> [#uses=1]
+	%206 = add i32 %205, 1		; <i32> [#uses=1]
+	%207 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb86.fragment
+
+bb86.fragment:		; preds = %bb86
+	%Opq.sa.calc943 = xor i32 %Opq.sa.calc670, 123		; <i32> [#uses=2]
+	store i32 %206, i32* %207, align 4
+	%208 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28		; <i32*> [#uses=1]
+	%209 = load i32* %208, align 4		; <i32> [#uses=2]
+	store i32 %209, i32* %.SV52.phi1234, align 4
+	br label %meshBB424
+
+bb87:		; preds = %meshBB440
+	%Opq.sa.calc674 = xor i32 %Opq.sa.calc990, 44		; <i32> [#uses=1]
+	%Opq.sa.calc673 = xor i32 %Opq.sa.calc674, 160		; <i32> [#uses=1]
+	store i32 0, i32* %.SV52.phi1235, align 4
+	br label %meshBB408
+
+bb89:		; preds = %bb84.fragment
+	%Opq.sa.calc677 = sub i32 %Opq.sa.calc802, -183		; <i32> [#uses=1]
+	%210 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24		; <i32*> [#uses=2]
+	br label %bb89.fragment
+
+bb89.fragment:		; preds = %bb89
+	%Opq.sa.calc962 = add i32 %Opq.sa.calc677, -188		; <i32> [#uses=3]
+	%211 = load i32* %210, align 4		; <i32> [#uses=3]
+	br i1 %203, label %bb90, label %meshBB408
+
+bb90:		; preds = %bb89.fragment
+	%Opq.sa.calc680 = xor i32 %Opq.sa.calc962, 92		; <i32> [#uses=1]
+	%212 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %211, i32* %212, align 4
+	%213 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28		; <i32*> [#uses=2]
+	br label %bb90.fragment
+
+bb90.fragment:		; preds = %bb90
+	%Opq.sa.calc773 = sub i32 %Opq.sa.calc680, 60		; <i32> [#uses=3]
+	%Opq.sa.calc772 = add i32 %Opq.sa.calc773, -25		; <i32> [#uses=2]
+	%214 = load i32* %213, align 4		; <i32> [#uses=3]
+	store i32 %214, i32* %.SV52.phi1190, align 4
+	%215 = load i32* %213, align 4		; <i32> [#uses=1]
+	%216 = icmp eq i32 %215, 0		; <i1> [#uses=1]
+	br i1 %216, label %meshBB416, label %meshBB368
+
+bb91:		; preds = %meshBB368
+	%Opq.sa.calc683 = sub i32 %Opq.sa.calc768, -7		; <i32> [#uses=0]
+	%217 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%218 = getelementptr %struct.ImageParameters* %217, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%219 = load %struct.Macroblock** %218, align 8		; <%struct.Macroblock*> [#uses=1]
+	%220 = load i32* %.SV170.phi, align 4		; <i32> [#uses=1]
+	br label %bb91.fragment
+
+bb91.fragment:		; preds = %bb91
+	%Opq.sa.calc853 = xor i32 %Opq.sa.calc768, 8		; <i32> [#uses=1]
+	%221 = sext i32 %220 to i64		; <i64> [#uses=1]
+	%222 = getelementptr %struct.Macroblock* %219, i64 %221, i32 20		; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4		; <i32> [#uses=1]
+	%224 = icmp eq i32 %223, 0		; <i1> [#uses=1]
+	br i1 %224, label %bb92, label %bb96
+
+bb92:		; preds = %bb91.fragment
+	%Opq.sa.calc686 = xor i32 %Opq.sa.calc853, 2		; <i32> [#uses=1]
+	%225 = add i32 %.SV172.phi, 1		; <i32> [#uses=1]
+	br label %bb92.fragment
+
+bb92.fragment:		; preds = %bb92
+	%Opq.sa.calc1005 = xor i32 %Opq.sa.calc686, 130		; <i32> [#uses=2]
+	store i32 %225, i32* %.SV176.phi, align 4
+	%226 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB380
+
+bb95:		; preds = %meshBB408
+	%Opq.sa.calc689 = xor i32 %Opq.sa.calc912, 207		; <i32> [#uses=3]
+	%227 = add i32 %.SV172.phi1074, 1		; <i32> [#uses=1]
+	%228 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB384
+
+bb95.fragment:		; preds = %meshBB384
+	%Opq.sa.calc841 = sub i32 %Opq.sa.calc901, 76		; <i32> [#uses=0]
+	store i32 %.SV306.phi, i32* %.SV308.phi, align 4
+	%229 = getelementptr %struct.Macroblock* %.load.SV.phi, i64 %.load20.SV.phi, i32 28		; <i32*> [#uses=1]
+	%230 = load i32* %229, align 4		; <i32> [#uses=2]
+	store i32 %230, i32* %.load53.SV.phi, align 4
+	br label %bb96
+
+bb96:		; preds = %meshBB444, %meshBB440, %meshBB436, %meshBB424, %meshBB420, %meshBB416, %meshBB396, %meshBB388, %meshBB380, %meshBB376, %meshBB364, %meshBB356, %meshBB340, %meshBB324, %meshBB, %bb95.fragment, %bb91.fragment, %bb74.fragment, %bb51, %bb48, %bb43, %bb41.fragment, %bb37.fragment, %bb36.fragment, %bb34.fragment, %bb30.fragment, %bb25, %bb19.fragment, %bb16.fragment, %bb15.fragment, %bb11.fragment, %bb9.fragment, %bb8.fragment, %bb7.fragment
+	%.SV38.phi1087 = phi i64 [ %.SV38.phi1224, %meshBB444 ], [ %.SV38.phi1210, %meshBB440 ], [ %.SV38.phi1147, %meshBB436 ], [ %.SV38.phi1197, %meshBB424 ], [ %.SV38.phi1194, %meshBB420 ], [ %.SV38.phi1201, %meshBB416 ], [ %.SV38.phi, %meshBB396 ], [ %.SV38.phi1118, %meshBB388 ], [ %.SV38.phi1207, %meshBB380 ], [ %.SV38.phi1153, %meshBB376 ], [ %.SV38.phi1098, %meshBB364 ], [ %.SV38.phi1121, %meshBB356 ], [ %.SV38.phi1167, %meshBB340 ], [ %.SV38.phi1175, %meshBB324 ], [ %.SV38.phi1183, %meshBB ], [ %.SV38.phi1164, %bb91.fragment ], [ %.SV38.phi1179, %bb48 ], [ %.SV38.phi1231, %bb41.fragment ], [ %.SV38.phi1172, %bb25 ], [ %.SV38.phi1175, %bb15.fragment ], [ %.SV38.phi1164, %bb9.fragment ], [ %.SV38.phi1164, %bb8.fragment ], [ %.SV38.phi1221, %bb95.fragment ], [ %.SV38.phi1187, %bb74.fragment ], [ %.SV38.phi1227, %bb51 ], [ %.SV38.phi1179, %bb43 ], [ %.SV38.phi1103, %bb37.fragment ], [ %.SV38.phi1214, %bb36.fragment ], [ %.SV38.phi1227, %bb34.fragment ], [ %.SV38.phi1121, %bb30.fragment ], [ %.SV38.phi1187, %bb19.fragment ], [ %.SV38.phi1175, %bb16.fragment ], [ %.SV38.phi1204, %bb11.fragment ], [ %.SV38.phi1118, %bb7.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1086 = phi i32 [ %.SV68.phi1223, %meshBB444 ], [ %.SV68.phi1209, %meshBB440 ], [ %.SV68.phi1146, %meshBB436 ], [ %.SV68.phi1196, %meshBB424 ], [ %.SV68.phi1193, %meshBB420 ], [ %.SV68.phi1199, %meshBB416 ], [ %.SV68.phi, %meshBB396 ], [ %.SV68.phi1117, %meshBB388 ], [ %.SV68.phi1206, %meshBB380 ], [ %.SV68.phi1152, %meshBB376 ], [ %.SV68.phi1096, %meshBB364 ], [ %.SV68.phi1120, %meshBB356 ], [ %.SV68.phi1166, %meshBB340 ], [ %.SV68.phi1174, %meshBB324 ], [ %.SV68.phi1181, %meshBB ], [ %.SV68.phi1162, %bb91.fragment ], [ %.SV68.phi1177, %bb48 ], [ %.SV68.phi1229, %bb41.fragment ], [ %.SV68.phi1169, %bb25 ], [ %.SV68.phi1174, %bb15.fragment ], [ %.SV68.phi1162, %bb9.fragment ], [ %.SV68.phi1162, %bb8.fragment ], [ %.SV68.phi1220, %bb95.fragment ], [ %.SV68.phi1185, %bb74.fragment ], [ %.SV68.phi1226, %bb51 ], [ %.SV68.phi1177, %bb43 ], [ %.SV68.phi1100, %bb37.fragment ], [ %.SV68.phi1212, %bb36.fragment ], [ %.SV68.phi1226, %bb34.fragment ], [ %.SV68.phi1120, %bb30.fragment ], [ %.SV68.phi1185, %bb19.fragment ], [ %.SV68.phi1174, %bb16.fragment ], [ %.SV68.phi1203, %bb11.fragment ], [ %.SV68.phi1117, %bb7.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1085 = phi i32 [ %.SV70.phi1222, %meshBB444 ], [ %.SV70.phi1208, %meshBB440 ], [ %.SV70.phi1145, %meshBB436 ], [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1192, %meshBB420 ], [ %.SV70.phi1198, %meshBB416 ], [ %.SV70.phi, %meshBB396 ], [ %.SV70.phi1116, %meshBB388 ], [ %.SV70.phi1205, %meshBB380 ], [ %.SV70.phi1151, %meshBB376 ], [ %.SV70.phi1095, %meshBB364 ], [ %.SV70.phi1119, %meshBB356 ], [ %.SV70.phi1165, %meshBB340 ], [ %.SV70.phi1173, %meshBB324 ], [ %.SV70.phi1180, %meshBB ], [ %.SV70.phi1161, %bb91.fragment ], [ %.SV70.phi1176, %bb48 ], [ %.SV70.phi1228, %bb41.fragment ], [ %.SV70.phi1168, %bb25 ], [ %.SV70.phi1173, %bb15.fragment ], [ %.SV70.phi1161, %bb9.fragment ], [ %.SV70.phi1161, %bb8.fragment ], [ %.SV70.phi1219, %bb95.fragment ], [ %.SV70.phi1184, %bb74.fragment ], [ %.SV70.phi1225, %bb51 ], [ %.SV70.phi1176, %bb43 ], [ %.SV70.phi1099, %bb37.fragment ], [ %.SV70.phi1211, %bb36.fragment ], [ %.SV70.phi1225, %bb34.fragment ], [ %.SV70.phi1119, %bb30.fragment ], [ %.SV70.phi1184, %bb19.fragment ], [ %.SV70.phi1173, %bb16.fragment ], [ %.SV70.phi1202, %bb11.fragment ], [ %.SV70.phi1116, %bb7.fragment ]		; <i32> [#uses=2]
+	%.SV.phi = phi i32 [ %.SV.phi1048, %meshBB444 ], [ %.SV.phi1056, %meshBB440 ], [ %.SV.phi1067, %meshBB436 ], [ %.SV.phi1072, %meshBB424 ], [ %.SV.phi1044, %meshBB420 ], [ %.SV.phi1076, %meshBB416 ], [ %.SV.phi1065, %meshBB396 ], [ %.SV.phi1054, %meshBB388 ], [ %.SV.phi1052, %meshBB380 ], [ %.SV.phi1050, %meshBB376 ], [ %.SV.phi1062, %meshBB364 ], [ %.SV.phi1046, %meshBB356 ], [ %.SV.phi1042, %meshBB340 ], [ %.SV.phi1032, %meshBB324 ], [ %.SV.phi1034, %meshBB ], [ %.SV178.phi, %bb91.fragment ], [ %.SV118.phi1040, %bb48 ], [ %.SV118.phi1125, %bb41.fragment ], [ %.SV118.phi, %bb25 ], [ %.load94.SV.phi, %bb15.fragment ], [ %32, %bb9.fragment ], [ %32, %bb8.fragment ], [ %230, %bb95.fragment ], [ %187, %bb74.fragment ], [ %.SV118.phi1081, %bb51 ], [ %.SV118.phi1040, %bb43 ], [ %.load131.SV.phi, %bb37.fragment ], [ %.SV118.phi1154, %bb36.fragment ], [ %.load129.SV.phi, %bb34.fragment ], [ %.SV118.phi1158, %bb30.fragment ], [ %66, %bb19.fragment ], [ %.SV93.phi, %bb16.fragment ], [ %.load84.SV.phi, %bb11.fragment ], [ %27, %bb7.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi = phi i32 [ -1, %meshBB444 ], [ %yN, %meshBB440 ], [ %yM.0.SV.phi1066, %meshBB436 ], [ %yN, %meshBB424 ], [ %yN, %meshBB420 ], [ -1, %meshBB416 ], [ -1, %meshBB396 ], [ %yM.0.SV.phi1053, %meshBB388 ], [ %yM.0.SV.phi1051, %meshBB380 ], [ %yM.0.SV.phi1049, %meshBB376 ], [ %yN, %meshBB364 ], [ %yN, %meshBB356 ], [ %yM.0.SV.phi1041, %meshBB340 ], [ -1, %meshBB324 ], [ -1, %meshBB ], [ %yN, %bb91.fragment ], [ -1, %bb48 ], [ %yN, %bb41.fragment ], [ -1, %bb25 ], [ %yN, %bb15.fragment ], [ %yN, %bb9.fragment ], [ -1, %bb8.fragment ], [ %yN, %bb95.fragment ], [ %yN, %bb74.fragment ], [ %133, %bb51 ], [ %118, %bb43 ], [ %107, %bb37.fragment ], [ %104, %bb36.fragment ], [ %yN, %bb34.fragment ], [ %91, %bb30.fragment ], [ %yN, %bb19.fragment ], [ %62, %bb16.fragment ], [ %45, %bb11.fragment ], [ %yN, %bb7.fragment ]		; <i32> [#uses=2]
+	%Opq.sa.calc693 = add i32 0, 15		; <i32> [#uses=2]
+	%Opq.sa.calc692 = xor i32 %Opq.sa.calc693, 8		; <i32> [#uses=1]
+	%231 = icmp eq i32 %.SV.phi, 0		; <i1> [#uses=1]
+	br i1 %231, label %bb97, label %meshBB404
+
+bb97:		; preds = %meshBB424, %meshBB408, %meshBB352, %bb96, %bb21
+	%.SV38.phi1150 = phi i64 [ %.SV38.phi1197, %meshBB424 ], [ %.SV38.phi1218, %meshBB408 ], [ %.SV38.phi1140, %meshBB352 ], [ %.SV38.phi1087, %bb96 ], [ %4, %bb21 ]		; <i64> [#uses=1]
+	%.SV68.phi1149 = phi i32 [ %.SV68.phi1196, %meshBB424 ], [ %.SV68.phi1216, %meshBB408 ], [ %.SV68.phi1139, %meshBB352 ], [ %.SV68.phi1086, %bb96 ], [ %.SV68.phi1021, %bb21 ]		; <i32> [#uses=1]
+	%.SV70.phi1148 = phi i32 [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1215, %meshBB408 ], [ %.SV70.phi1138, %meshBB352 ], [ %.SV70.phi1085, %bb96 ], [ %.SV70.phi1027, %bb21 ]		; <i32> [#uses=1]
+	%yM.0.reg2mem.0.SV.phi = phi i32 [ -1, %meshBB424 ], [ -1, %meshBB408 ], [ -1, %meshBB352 ], [ %yM.0.SV.phi, %bb96 ], [ -1, %bb21 ]		; <i32> [#uses=1]
+	%Opq.sa.calc694 = xor i32 0, 243		; <i32> [#uses=1]
+	%232 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%233 = getelementptr %struct.ImageParameters* %232, i64 0, i32 45		; <i32*> [#uses=1]
+	br label %bb97.fragment
+
+bb97.fragment:		; preds = %bb97
+	%Opq.sa.calc928 = xor i32 %Opq.sa.calc694, 128		; <i32> [#uses=1]
+	%234 = load i32* %233, align 4		; <i32> [#uses=1]
+	%235 = icmp eq i32 %234, 0		; <i1> [#uses=1]
+	br i1 %235, label %return, label %bb98
+
+bb98:		; preds = %meshBB444, %meshBB404, %bb97.fragment, %bb81, %bb78.fragment
+	%.SV38.phi1093 = phi i64 [ %.SV38.phi1224, %meshBB444 ], [ %.SV38.phi1017, %meshBB404 ], [ %.SV38.phi1150, %bb97.fragment ], [ %.SV38.phi1137, %bb81 ], [ %.SV38.phi1201, %bb78.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1092 = phi i32 [ %.SV68.phi1223, %meshBB444 ], [ %.SV68.phi1023, %meshBB404 ], [ %.SV68.phi1149, %bb97.fragment ], [ %.SV68.phi1135, %bb81 ], [ %.SV68.phi1199, %bb78.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1091 = phi i32 [ %.SV70.phi1222, %meshBB444 ], [ %.SV70.phi1028, %meshBB404 ], [ %.SV70.phi1148, %bb97.fragment ], [ %.SV70.phi1134, %bb81 ], [ %.SV70.phi1198, %bb78.fragment ]		; <i32> [#uses=2]
+	%yM.0.reg2mem.1.SV.phi1068 = phi i32 [ %yN, %meshBB444 ], [ %yM.0.reg2mem.1.SV.phi1077, %meshBB404 ], [ %yM.0.reg2mem.0.SV.phi, %bb97.fragment ], [ %yN, %bb81 ], [ %197, %bb78.fragment ]		; <i32> [#uses=1]
+	%Opq.sa.calc695 = xor i32 0, 23		; <i32> [#uses=2]
+	%236 = and i32 %.SV70.phi1091, %xN		; <i32> [#uses=1]
+	%237 = getelementptr %struct.PixelPos* %pix, i64 0, i32 2		; <i32*> [#uses=2]
+	store i32 %236, i32* %237, align 4
+	%238 = and i32 %yM.0.reg2mem.1.SV.phi1068, %.SV68.phi1092		; <i32> [#uses=1]
+	%239 = getelementptr %struct.PixelPos* %pix, i64 0, i32 3		; <i32*> [#uses=2]
+	store i32 %238, i32* %239, align 4
+	%240 = getelementptr %struct.PixelPos* %pix, i64 0, i32 5		; <i32*> [#uses=1]
+	br label %meshBB376
+
+bb98.fragment:		; preds = %meshBB376
+	%Opq.sa.calc1008 = sub i32 %Opq.link.mask911, 13		; <i32> [#uses=1]
+	%241 = getelementptr %struct.PixelPos* %pix, i64 0, i32 4		; <i32*> [#uses=4]
+	%242 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	%243 = load i32* %242, align 4		; <i32> [#uses=1]
+	%244 = load void (i32, i32*, i32*)** @get_mb_block_pos, align 8		; <void (i32, i32*, i32*)*> [#uses=1]
+	tail call void %244(i32 %243, i32* %241, i32* %.SV317.phi) nounwind
+	%245 = load i32* %241, align 4		; <i32> [#uses=1]
+	%246 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%247 = getelementptr %struct.ImageParameters* %246, i64 0, i32 119, i64 %.load39.SV.phi, i64 0		; <i32*> [#uses=1]
+	%248 = load i32* %247, align 4		; <i32> [#uses=1]
+	%249 = mul i32 %248, %245		; <i32> [#uses=2]
+	store i32 %249, i32* %241, align 4
+	br label %bb98.fragment183
+
+bb98.fragment183:		; preds = %bb98.fragment
+	%Opq.sa.calc777 = sub i32 %Opq.sa.calc1008, -158		; <i32> [#uses=1]
+	%Opq.sa.calc776 = sub i32 %Opq.sa.calc777, 46		; <i32> [#uses=0]
+	%250 = load i32* %.SV317.phi, align 4		; <i32> [#uses=1]
+	%251 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%252 = getelementptr %struct.ImageParameters* %251, i64 0, i32 119, i64 %.load39.SV.phi, i64 1		; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4		; <i32> [#uses=1]
+	%254 = mul i32 %253, %250		; <i32> [#uses=1]
+	%255 = load i32* %.SV313.phi, align 4		; <i32> [#uses=1]
+	%256 = add i32 %255, %249		; <i32> [#uses=1]
+	store i32 %256, i32* %241, align 4
+	%257 = load i32* %.SV315.phi, align 4		; <i32> [#uses=1]
+	%258 = add i32 %257, %254		; <i32> [#uses=1]
+	store i32 %258, i32* %.SV317.phi, align 4
+	ret void
+
+return:		; preds = %meshBB448, %meshBB396, %bb97.fragment
+	%Opq.link.SV697.phi = phi i32 [ %Opq.sa.calc957, %meshBB448 ], [ %Opq.sa.calc758, %meshBB396 ], [ %Opq.sa.calc928, %bb97.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask699 = and i32 %Opq.link.SV697.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc696 = add i32 %Opq.link.mask699, 238		; <i32> [#uses=0]
+	ret void
+
+meshBB:		; preds = %bb33.fragment, %bb14.fragment
+	%.SV38.phi1183 = phi i64 [ %.SV38.phi1115, %bb14.fragment ], [ %.SV38.phi1172, %bb33.fragment ]		; <i64> [#uses=3]
+	%.SV68.phi1181 = phi i32 [ %.SV68.phi1112, %bb14.fragment ], [ %.SV68.phi1169, %bb33.fragment ]		; <i32> [#uses=3]
+	%.SV70.phi1180 = phi i32 [ %.SV70.phi1111, %bb14.fragment ], [ %.SV70.phi1168, %bb33.fragment ]		; <i32> [#uses=3]
+	%.SV104.phi1084 = phi i32 [ undef, %bb14.fragment ], [ %.SV104.phi, %bb33.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1083 = phi i32* [ undef, %bb14.fragment ], [ %.SV111.phi, %bb33.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1082 = phi i32 [ undef, %bb14.fragment ], [ %.SV118.phi, %bb33.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1034 = phi i32 [ %50, %bb14.fragment ], [ undef, %bb33.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable.phi = phi i32 [ %Opq.sa.calc723, %bb14.fragment ], [ %Opq.sa.calc712, %bb33.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV829.phi = phi i32 [ %Opq.sa.calc723, %bb14.fragment ], [ %Opq.sa.calc534, %bb33.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask831 = and i32 %Opq.link.SV829.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc828 = sub i32 %Opq.link.mask831, -117		; <i32> [#uses=2]
+	%meshCmp = icmp eq i32 %meshStackVariable.phi, 3		; <i1> [#uses=1]
+	br i1 %meshCmp, label %bb35, label %bb96
+
+meshBB324:		; preds = %bb32, %bb15
+	%.SV38.phi1175 = phi i64 [ %.SV38.phi1172, %bb32 ], [ %.SV38.phi1115, %bb15 ]		; <i64> [#uses=3]
+	%.SV68.phi1174 = phi i32 [ %.SV68.phi1169, %bb32 ], [ %.SV68.phi1112, %bb15 ]		; <i32> [#uses=3]
+	%.SV70.phi1173 = phi i32 [ %.SV70.phi1168, %bb32 ], [ %.SV70.phi1111, %bb15 ]		; <i32> [#uses=3]
+	%.load94.SV.phi = phi i32 [ undef, %bb32 ], [ %50, %bb15 ]		; <i32> [#uses=1]
+	%.SV212.phi = phi %struct.Macroblock* [ undef, %bb32 ], [ %55, %bb15 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV214.phi = phi i32 [ undef, %bb32 ], [ %56, %bb15 ]		; <i32> [#uses=1]
+	%meshStackVariable325.phi = phi i32 [ %Opq.sa.calc531, %bb32 ], [ %Opq.sa.calc496, %bb15 ]		; <i32> [#uses=1]
+	%Opq.link.SV751.phi = phi i32 [ %Opq.sa.calc512, %bb32 ], [ %Opq.sa.calc723, %bb15 ]		; <i32> [#uses=1]
+	%.SV.phi1032 = phi i32 [ %.SV118.phi, %bb32 ], [ undef, %bb15 ]		; <i32> [#uses=1]
+	%.SV93.phi = phi i32 [ undef, %bb32 ], [ %50, %bb15 ]		; <i32> [#uses=1]
+	%.SV91.phi = phi i32* [ undef, %bb32 ], [ %48, %bb15 ]		; <i32*> [#uses=1]
+	%.SV87.phi = phi i32 [ undef, %bb32 ], [ %47, %bb15 ]		; <i32> [#uses=1]
+	%Opq.link.mask753 = and i32 %Opq.link.SV751.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc750 = add i32 %Opq.link.mask753, 203		; <i32> [#uses=1]
+	%meshCmp327 = icmp eq i32 %meshStackVariable325.phi, 14		; <i1> [#uses=1]
+	br i1 %meshCmp327, label %bb15.fragment, label %bb96
+
+meshBB328:		; preds = %bb50, %bb34
+	%.SV38.phi1227 = phi i64 [ %.SV38.phi1179, %bb50 ], [ %.SV38.phi1172, %bb34 ]		; <i64> [#uses=2]
+	%.SV68.phi1226 = phi i32 [ %.SV68.phi1177, %bb50 ], [ %.SV68.phi1169, %bb34 ]		; <i32> [#uses=2]
+	%.SV70.phi1225 = phi i32 [ %.SV70.phi1176, %bb50 ], [ %.SV70.phi1168, %bb34 ]		; <i32> [#uses=2]
+	%.SV118.phi1081 = phi i32 [ %.SV118.phi1040, %bb50 ], [ %.SV118.phi, %bb34 ]		; <i32> [#uses=1]
+	%.load129.SV.phi = phi i32 [ undef, %bb50 ], [ %.SV118.phi, %bb34 ]		; <i32> [#uses=1]
+	%.load116.SV.phi = phi i32* [ undef, %bb50 ], [ %.SV111.phi, %bb34 ]		; <i32*> [#uses=1]
+	%.SV238.phi = phi i32 [ undef, %bb50 ], [ %100, %bb34 ]		; <i32> [#uses=1]
+	%meshStackVariable329.phi = phi i32 [ %Opq.sa.calc577, %bb50 ], [ %Opq.sa.calc537, %bb34 ]		; <i32> [#uses=1]
+	%Opq.link.SV788.phi = phi i32 [ %Opq.sa.calc577, %bb50 ], [ %Opq.sa.calc712, %bb34 ]		; <i32> [#uses=1]
+	%Opq.link.mask790 = and i32 %Opq.link.SV788.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc787 = sub i32 %Opq.link.mask790, -227		; <i32> [#uses=2]
+	%meshCmp331 = icmp eq i32 %meshStackVariable329.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp331, label %bb34.fragment, label %bb51
+
+meshBB332:		; preds = %bb44, %bb11
+	%.SV38.phi1204 = phi i64 [ %.SV38.phi1231, %bb44 ], [ %.SV38.phi1164, %bb11 ]		; <i64> [#uses=2]
+	%.SV68.phi1203 = phi i32 [ %.SV68.phi1229, %bb44 ], [ %.SV68.phi1162, %bb11 ]		; <i32> [#uses=2]
+	%.SV70.phi1202 = phi i32 [ %.SV70.phi1228, %bb44 ], [ %.SV70.phi1161, %bb11 ]		; <i32> [#uses=2]
+	%.load127.SV.phi = phi i32 [ %.SV118.phi1125, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.load114.SV.phi = phi i32* [ %.SV111.phi1126, %bb44 ], [ undef, %bb11 ]		; <i32*> [#uses=1]
+	%.load46.SV.phi = phi i32 [ %.SV43.phi1230, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.SV248.phi = phi i32 [ %119, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.load84.SV.phi = phi i32 [ undef, %bb44 ], [ %32, %bb11 ]		; <i32> [#uses=1]
+	%.load81.SV.phi = phi i32* [ undef, %bb44 ], [ %.SV80.phi, %bb11 ]		; <i32*> [#uses=1]
+	%.load50.SV.phi = phi i32 [ undef, %bb44 ], [ %.SV43.phi1163, %bb11 ]		; <i32> [#uses=1]
+	%.SV206.phi = phi i32 [ undef, %bb44 ], [ %43, %bb11 ]		; <i32> [#uses=1]
+	%meshStackVariable333.phi = phi i32 [ %Opq.sa.calc566, %bb44 ], [ %Opq.sa.calc485, %bb11 ]		; <i32> [#uses=1]
+	%Opq.link.SV857.phi = phi i32 [ %Opq.sa.calc987, %bb44 ], [ %Opq.sa.calc485, %bb11 ]		; <i32> [#uses=1]
+	%Opq.link.mask859 = and i32 %Opq.link.SV857.phi, 4		; <i32> [#uses=2]
+	%Opq.sa.calc856 = add i32 %Opq.link.mask859, 204		; <i32> [#uses=2]
+	%meshCmp335 = icmp eq i32 %meshStackVariable333.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp335, label %bb11.fragment, label %bb44.fragment
+
+meshBB336:		; preds = %bb85, %bb40
+	%.SV52.phi1234 = phi i32* [ %.SV52.phi1213, %bb85 ], [ undef, %bb40 ]		; <i32*> [#uses=1]
+	%.SV38.phi1231 = phi i64 [ %.SV38.phi1214, %bb85 ], [ %.SV38.phi1179, %bb40 ]		; <i64> [#uses=4]
+	%.SV43.phi1230 = phi i32 [ undef, %bb85 ], [ %.SV43.phi1178, %bb40 ]		; <i32> [#uses=3]
+	%.SV68.phi1229 = phi i32 [ %.SV68.phi1212, %bb85 ], [ %.SV68.phi1177, %bb40 ]		; <i32> [#uses=4]
+	%.SV70.phi1228 = phi i32 [ %.SV70.phi1211, %bb85 ], [ %.SV70.phi1176, %bb40 ]		; <i32> [#uses=4]
+	%.SV99.phi1128 = phi i32* [ undef, %bb85 ], [ %.SV99.phi1037, %bb40 ]		; <i32*> [#uses=1]
+	%.SV104.phi1127 = phi i32 [ undef, %bb85 ], [ %.SV104.phi1036, %bb40 ]		; <i32> [#uses=2]
+	%.SV111.phi1126 = phi i32* [ undef, %bb85 ], [ %.SV111.phi1035, %bb40 ]		; <i32*> [#uses=2]
+	%.SV118.phi1125 = phi i32 [ undef, %bb85 ], [ %.SV118.phi1040, %bb40 ]		; <i32> [#uses=3]
+	%meshStackVariable337.phi = phi i32 [ %Opq.sa.calc665, %bb85 ], [ %Opq.sa.calc553, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.SV980.phi = phi i32 [ %Opq.sa.calc667, %bb85 ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.mask982 = and i32 %Opq.link.SV980.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc979 = sub i32 %Opq.link.mask982, -153		; <i32> [#uses=2]
+	%meshCmp339 = icmp eq i32 %meshStackVariable337.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp339, label %bb41, label %bb86
+
+meshBB340:		; preds = %bb29, %bb26
+	%.SV38.phi1167 = phi i64 [ %.SV38.phi1121, %bb29 ], [ %.SV38.phi1172, %bb26 ]		; <i64> [#uses=3]
+	%.SV68.phi1166 = phi i32 [ %.SV68.phi1120, %bb29 ], [ %.SV68.phi1169, %bb26 ]		; <i32> [#uses=3]
+	%.SV70.phi1165 = phi i32 [ %.SV70.phi1119, %bb29 ], [ %.SV70.phi1168, %bb26 ]		; <i32> [#uses=3]
+	%.SV104.phi1080 = phi i32 [ undef, %bb29 ], [ %.SV104.phi, %bb26 ]		; <i32> [#uses=1]
+	%.SV111.phi1079 = phi i32* [ undef, %bb29 ], [ %.SV111.phi, %bb26 ]		; <i32*> [#uses=1]
+	%.SV118.phi1078 = phi i32 [ %.SV118.phi1158, %bb29 ], [ %.SV118.phi, %bb26 ]		; <i32> [#uses=1]
+	%.load123.SV.phi = phi i32 [ undef, %bb29 ], [ %.SV118.phi, %bb26 ]		; <i32> [#uses=2]
+	%.SV228.phi = phi %struct.Macroblock* [ undef, %bb29 ], [ %81, %bb26 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV230.phi = phi i32 [ undef, %bb29 ], [ %82, %bb26 ]		; <i32> [#uses=1]
+	%meshStackVariable341.phi = phi i32 [ %Opq.sa.calc525, %bb29 ], [ %Opq.sa.calc518, %bb26 ]		; <i32> [#uses=1]
+	%Opq.link.SV755.phi = phi i32 [ %Opq.sa.calc525, %bb29 ], [ %Opq.sa.calc519, %bb26 ]		; <i32> [#uses=1]
+	%.SV.phi1042 = phi i32 [ %.SV118.phi1158, %bb29 ], [ undef, %bb26 ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1041 = phi i32 [ %89, %bb29 ], [ undef, %bb26 ]		; <i32> [#uses=1]
+	%Opq.link.mask757 = and i32 %Opq.link.SV755.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc754 = add i32 %Opq.link.mask757, 225		; <i32> [#uses=2]
+	%meshCmp343 = icmp eq i32 %meshStackVariable341.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp343, label %bb26.fragment, label %bb96
+
+meshBB344:		; preds = %bb68, %bb23.fragment182
+	%.SV38.phi1172 = phi i64 [ %.SV38.phi1115, %bb23.fragment182 ], [ %.SV38.phi1098, %bb68 ]		; <i64> [#uses=8]
+	%.SV52.phi1170 = phi i32* [ undef, %bb23.fragment182 ], [ %.SV52.phi1097, %bb68 ]		; <i32*> [#uses=2]
+	%.SV68.phi1169 = phi i32 [ %.SV68.phi1112, %bb23.fragment182 ], [ %.SV68.phi1096, %bb68 ]		; <i32> [#uses=8]
+	%.SV70.phi1168 = phi i32 [ %.SV70.phi1111, %bb23.fragment182 ], [ %.SV70.phi1095, %bb68 ]		; <i32> [#uses=8]
+	%.load144.SV.phi = phi i1 [ undef, %bb23.fragment182 ], [ %145, %bb68 ]		; <i1> [#uses=1]
+	%.SV274.phi = phi i32* [ undef, %bb23.fragment182 ], [ %167, %bb68 ]		; <i32*> [#uses=2]
+	%.SV118.phi = phi i32 [ %76, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32> [#uses=7]
+	%.SV135.phi = phi i1 [ %78, %bb23.fragment182 ], [ undef, %bb68 ]		; <i1> [#uses=2]
+	%meshStackVariable345.phi = phi i32 [ %Opq.sa.calc743, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%Opq.link.SV717.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%Opq.link.SV720.phi = phi i32 [ %Opq.sa.calc743, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%.SV96.phi = phi i1 [ %71, %bb23.fragment182 ], [ undef, %bb68 ]		; <i1> [#uses=1]
+	%.SV99.phi = phi i32* [ %72, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32*> [#uses=2]
+	%.SV104.phi = phi i32 [ %73, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32> [#uses=3]
+	%.SV111.phi = phi i32* [ %74, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32*> [#uses=3]
+	%Opq.link.mask722 = and i32 %Opq.link.SV720.phi, 9		; <i32> [#uses=3]
+	%Opq.link.mask719 = and i32 %Opq.link.SV717.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc715 = sub i32 %Opq.link.mask719, %Opq.link.mask722		; <i32> [#uses=1]
+	%Opq.sa.calc716 = sub i32 %Opq.sa.calc715, -101		; <i32> [#uses=2]
+	%meshCmp347 = icmp eq i32 %meshStackVariable345.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp347, label %bb68.fragment, label %bb24
+
+meshBB348:		; preds = %bb37, %bb6
+	%.SV38.phi1103 = phi i64 [ %.SV38.phi1014, %bb6 ], [ %.SV38.phi1019, %bb37 ]		; <i64> [#uses=2]
+	%.SV43.phi1102 = phi i32 [ %.SV43.phi, %bb6 ], [ %.SV43.phi1018, %bb37 ]		; <i32> [#uses=1]
+	%.SV52.phi1101 = phi i32* [ %.SV52.phi, %bb6 ], [ undef, %bb37 ]		; <i32*> [#uses=1]
+	%.SV68.phi1100 = phi i32 [ %.SV68.phi1020, %bb6 ], [ %.SV68.phi1025, %bb37 ]		; <i32> [#uses=2]
+	%.SV70.phi1099 = phi i32 [ %.SV70.phi1026, %bb6 ], [ %.SV70.phi1233, %bb37 ]		; <i32> [#uses=2]
+	%.load131.SV.phi = phi i32 [ undef, %bb6 ], [ %.SV118.phi1155, %bb37 ]		; <i32> [#uses=1]
+	%.load115.SV.phi = phi i32* [ undef, %bb6 ], [ %.SV111.phi1156, %bb37 ]		; <i32*> [#uses=1]
+	%.load48.SV.phi = phi i32 [ undef, %bb6 ], [ %.SV43.phi1018, %bb37 ]		; <i32> [#uses=1]
+	%.SV242.phi = phi i32 [ undef, %bb6 ], [ %105, %bb37 ]		; <i32> [#uses=1]
+	%meshStackVariable349.phi = phi i32 [ %Opq.sa.calc473, %bb6 ], [ %Opq.sa.calc547, %bb37 ]		; <i32> [#uses=1]
+	%Opq.link.SV806.phi = phi i32 [ %Opq.sa.calc873, %bb6 ], [ %Opq.sa.calc958, %bb37 ]		; <i32> [#uses=1]
+	%Opq.link.mask808 = and i32 %Opq.link.SV806.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc805 = sub i32 %Opq.link.mask808, -147		; <i32> [#uses=3]
+	%meshCmp351 = icmp eq i32 %meshStackVariable349.phi, 13		; <i1> [#uses=1]
+	br i1 %meshCmp351, label %bb37.fragment, label %bb8
+
+meshBB352:		; preds = %bb79, %bb71
+	%.SV38.phi1140 = phi i64 [ %.SV38.phi1110, %bb71 ], [ %.SV38.phi1098, %bb79 ]		; <i64> [#uses=2]
+	%.SV68.phi1139 = phi i32 [ %.SV68.phi1108, %bb71 ], [ %.SV68.phi1096, %bb79 ]		; <i32> [#uses=2]
+	%.SV70.phi1138 = phi i32 [ %.SV70.phi1107, %bb71 ], [ %.SV70.phi1095, %bb79 ]		; <i32> [#uses=2]
+	%.load166.SV.phi = phi i32 [ %.SV164.phi1104, %bb71 ], [ undef, %bb79 ]		; <i32> [#uses=1]
+	%.load163.SV.phi = phi i32* [ %.SV162.phi1105, %bb71 ], [ undef, %bb79 ]		; <i32*> [#uses=1]
+	%.SV282.phi = phi i32 [ %182, %bb71 ], [ undef, %bb79 ]		; <i32> [#uses=1]
+	%meshStackVariable353.phi = phi i32 [ %Opq.sa.calc633, %bb71 ], [ %Opq.sa.calc650, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.SV877.phi = phi i32 [ %Opq.sa.calc820, %bb71 ], [ %Opq.sa.calc650, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.mask879 = and i32 %Opq.link.SV877.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc876 = add i32 %Opq.link.mask879, 18		; <i32> [#uses=1]
+	%meshCmp355 = icmp eq i32 %meshStackVariable353.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp355, label %bb97, label %bb71.fragment
+
+meshBB356:		; preds = %bb70.fragment, %bb26.fragment
+	%.SV104.phi1160 = phi i32 [ undef, %bb70.fragment ], [ %.SV104.phi1080, %bb26.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1159 = phi i32* [ undef, %bb70.fragment ], [ %.SV111.phi1079, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1158 = phi i32 [ undef, %bb70.fragment ], [ %.SV118.phi1078, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV38.phi1121 = phi i64 [ %.SV38.phi1014, %bb70.fragment ], [ %.SV38.phi1167, %bb26.fragment ]		; <i64> [#uses=3]
+	%.SV68.phi1120 = phi i32 [ %.SV68.phi1020, %bb70.fragment ], [ %.SV68.phi1166, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV70.phi1119 = phi i32 [ %.SV70.phi1026, %bb70.fragment ], [ %.SV70.phi1165, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV.phi1046 = phi i32 [ %.load165.SV.phi, %bb70.fragment ], [ %.load123.SV.phi, %bb26.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable357.phi = phi i32 [ %Opq.sa.calc738, %bb70.fragment ], [ %Opq.sa.calc917, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV984.phi = phi i32 [ %Opq.sa.calc738, %bb70.fragment ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask986 = and i32 %Opq.link.SV984.phi, 9		; <i32> [#uses=1]
+	%Opq.sa.calc983 = xor i32 %Opq.link.mask986, 251		; <i32> [#uses=1]
+	%meshCmp359 = icmp eq i32 %meshStackVariable357.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp359, label %bb28, label %bb96
+
+meshBB360:		; preds = %bb21, %bb13
+	%.SV38.phi1115 = phi i64 [ %4, %bb21 ], [ %.SV38.phi1014, %bb13 ]		; <i64> [#uses=5]
+	%.SV52.phi1113 = phi i32* [ %.SV52.phi1022, %bb21 ], [ %.SV52.phi, %bb13 ]		; <i32*> [#uses=3]
+	%.SV68.phi1112 = phi i32 [ %.SV68.phi1021, %bb21 ], [ %.SV68.phi1020, %bb13 ]		; <i32> [#uses=5]
+	%.SV70.phi1111 = phi i32 [ %.SV70.phi1027, %bb21 ], [ %.SV70.phi1026, %bb13 ]		; <i32> [#uses=5]
+	%.load74.SV.phi = phi i1 [ undef, %bb21 ], [ %21, %bb13 ]		; <i1> [#uses=1]
+	%.SV208.phi = phi i32* [ undef, %bb21 ], [ %46, %bb13 ]		; <i32*> [#uses=2]
+	%meshStackVariable361.phi = phi i32 [ %Opq.sa.calc505, %bb21 ], [ %Opq.sa.calc489, %bb13 ]		; <i32> [#uses=1]
+	%Opq.link.SV867.phi = phi i32 [ %Opq.sa.calc505, %bb21 ], [ %Opq.sa.calc873, %bb13 ]		; <i32> [#uses=1]
+	%Opq.link.mask869 = and i32 %Opq.link.SV867.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc866 = add i32 %Opq.link.mask869, 148		; <i32> [#uses=4]
+	%meshCmp363 = icmp eq i32 %meshStackVariable361.phi, 16		; <i1> [#uses=1]
+	br i1 %meshCmp363, label %bb13.fragment, label %bb23
+
+meshBB364:		; preds = %bb65.fragment, %bb56
+	%.SV38.phi1098 = phi i64 [ %.SV38.phi1017, %bb56 ], [ %.SV38.phi1147, %bb65.fragment ]		; <i64> [#uses=11]
+	%.SV52.phi1097 = phi i32* [ %.SV52.phi1024, %bb56 ], [ undef, %bb65.fragment ]		; <i32*> [#uses=8]
+	%.SV68.phi1096 = phi i32 [ %.SV68.phi1023, %bb56 ], [ %.SV68.phi1146, %bb65.fragment ]		; <i32> [#uses=11]
+	%.SV70.phi1095 = phi i32 [ %.SV70.phi1028, %bb56 ], [ %.SV70.phi1145, %bb65.fragment ]		; <i32> [#uses=11]
+	%or.cond.not.SV.phi1094 = phi i1 [ %or.cond.not.SV.phi1029, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%.SV.phi1062 = phi i32 [ undef, %bb56 ], [ %.SV268.phi, %bb65.fragment ]		; <i32> [#uses=1]
+	%.not4.SV.phi = phi i1 [ %.not4, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%.SV256.phi = phi i1 [ %139, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%meshStackVariable365.phi = phi i32 [ %Opq.sa.calc592, %bb56 ], [ %Opq.sa.calc832, %bb65.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV735.phi = phi i32 [ %Opq.sa.calc592, %bb56 ], [ %Opq.sa.calc832, %bb65.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask737 = and i32 %Opq.link.SV735.phi, 0		; <i32> [#uses=2]
+	%Opq.sa.calc734 = sub i32 %Opq.link.mask737, -242		; <i32> [#uses=0]
+	%meshCmp367 = icmp eq i32 %meshStackVariable365.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp367, label %bb96, label %bb56.fragment
+
+meshBB368:		; preds = %bb90.fragment, %bb8
+	%.SV38.phi1164 = phi i64 [ %.SV38.phi1103, %bb8 ], [ %.SV38.phi1191, %bb90.fragment ]		; <i64> [#uses=5]
+	%.SV43.phi1163 = phi i32 [ %.SV43.phi1102, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV68.phi1162 = phi i32 [ %.SV68.phi1100, %bb8 ], [ %.SV68.phi1189, %bb90.fragment ]		; <i32> [#uses=5]
+	%.SV70.phi1161 = phi i32 [ %.SV70.phi1099, %bb8 ], [ %.SV70.phi1188, %bb90.fragment ]		; <i32> [#uses=5]
+	%.SV178.phi = phi i32 [ undef, %bb8 ], [ %214, %bb90.fragment ]		; <i32> [#uses=2]
+	%.SV176.phi = phi i32* [ undef, %bb8 ], [ %212, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV170.phi = phi i32* [ undef, %bb8 ], [ %210, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV172.phi = phi i32 [ undef, %bb8 ], [ %211, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV76.phi = phi i32* [ %28, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV78.phi = phi i32 [ %29, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV80.phi = phi i32* [ %30, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.load66.SV.phi = phi i32* [ %.SV52.phi1101, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.load35.SV.phi = phi i64 [ %3, %bb8 ], [ undef, %bb90.fragment ]		; <i64> [#uses=1]
+	%.load16.SV.phi = phi %struct.Macroblock* [ %2, %bb8 ], [ undef, %bb90.fragment ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV198.phi = phi i32 [ %29, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV200.phi = phi i32* [ %30, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%meshStackVariable369.phi = phi i32 [ %Opq.sa.calc479, %bb8 ], [ %Opq.sa.calc772, %bb90.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV769.phi = phi i32 [ %Opq.sa.calc805, %bb8 ], [ %Opq.sa.calc772, %bb90.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask771 = and i32 %Opq.link.SV769.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc768 = xor i32 %Opq.link.mask771, 135		; <i32> [#uses=3]
+	%meshCmp371 = icmp eq i32 %meshStackVariable369.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp371, label %bb91, label %bb8.fragment
+
+meshBB372:		; preds = %bb84.fragment, %bb35
+	%.SV38.phi1214 = phi i64 [ %.SV38.phi1191, %bb84.fragment ], [ %.SV38.phi1183, %bb35 ]		; <i64> [#uses=3]
+	%.SV52.phi1213 = phi i32* [ %.SV52.phi1190, %bb84.fragment ], [ undef, %bb35 ]		; <i32*> [#uses=2]
+	%.SV68.phi1212 = phi i32 [ %.SV68.phi1189, %bb84.fragment ], [ %.SV68.phi1181, %bb35 ]		; <i32> [#uses=3]
+	%.SV70.phi1211 = phi i32 [ %.SV70.phi1188, %bb84.fragment ], [ %.SV70.phi1180, %bb35 ]		; <i32> [#uses=3]
+	%.SV118.phi1154 = phi i32 [ undef, %bb84.fragment ], [ %.SV118.phi1082, %bb35 ]		; <i32> [#uses=1]
+	%.SV167.phi = phi i1 [ %203, %bb84.fragment ], [ undef, %bb35 ]		; <i1> [#uses=1]
+	%meshStackVariable373.phi = phi i32 [ %Opq.sa.calc802, %bb84.fragment ], [ %Opq.sa.calc540, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.SV813.phi = phi i32 [ %Opq.sa.calc802, %bb84.fragment ], [ %Opq.sa.calc541, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.mask815 = and i32 %Opq.link.SV813.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc812 = sub i32 %Opq.link.mask815, -121		; <i32> [#uses=3]
+	%meshCmp375 = icmp eq i32 %meshStackVariable373.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp375, label %bb36, label %bb85
+
+meshBB376:		; preds = %bb98, %bb44.fragment
+	%.SV38.phi1153 = phi i64 [ %.SV38.phi1093, %bb98 ], [ %.SV38.phi1204, %bb44.fragment ]		; <i64> [#uses=1]
+	%.SV68.phi1152 = phi i32 [ %.SV68.phi1092, %bb98 ], [ %.SV68.phi1203, %bb44.fragment ]		; <i32> [#uses=1]
+	%.SV70.phi1151 = phi i32 [ %.SV70.phi1091, %bb98 ], [ %.SV70.phi1202, %bb44.fragment ]		; <i32> [#uses=1]
+	%.load39.SV.phi = phi i64 [ %.SV38.phi1093, %bb98 ], [ undef, %bb44.fragment ]		; <i64> [#uses=2]
+	%.SV313.phi = phi i32* [ %237, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=1]
+	%.SV315.phi = phi i32* [ %239, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=1]
+	%.SV317.phi = phi i32* [ %240, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=3]
+	%.SV.phi1050 = phi i32 [ undef, %bb98 ], [ %.load127.SV.phi, %bb44.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1049 = phi i32 [ undef, %bb98 ], [ %121, %bb44.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable377.phi = phi i32 [ %Opq.sa.calc695, %bb98 ], [ %Opq.sa.calc894, %bb44.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV909.phi = phi i32 [ %Opq.sa.calc695, %bb98 ], [ %Opq.sa.calc856, %bb44.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask911 = and i32 %Opq.link.SV909.phi, 16		; <i32> [#uses=2]
+	%Opq.sa.calc908 = add i32 %Opq.link.mask911, -11		; <i32> [#uses=0]
+	%meshCmp379 = icmp eq i32 %meshStackVariable377.phi, 8		; <i1> [#uses=1]
+	br i1 %meshCmp379, label %bb96, label %bb98.fragment
+
+meshBB380:		; preds = %bb92.fragment, %bb49.fragment
+	%.SV38.phi1207 = phi i64 [ %.SV38.phi1164, %bb92.fragment ], [ %.SV38.phi1179, %bb49.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1206 = phi i32 [ %.SV68.phi1162, %bb92.fragment ], [ %.SV68.phi1177, %bb49.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1205 = phi i32 [ %.SV70.phi1161, %bb92.fragment ], [ %.SV70.phi1176, %bb49.fragment ]		; <i32> [#uses=2]
+	%.SV104.phi1124 = phi i32 [ undef, %bb92.fragment ], [ %.SV104.phi1036, %bb49.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1123 = phi i32* [ undef, %bb92.fragment ], [ %.SV111.phi1035, %bb49.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1122 = phi i32 [ undef, %bb92.fragment ], [ %.SV118.phi1040, %bb49.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable381.phi = phi i32 [ %Opq.sa.calc1005, %bb92.fragment ], [ %Opq.sa.calc860, %bb49.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV947.phi = phi i32 [ %Opq.sa.calc1005, %bb92.fragment ], [ %Opq.sa.calc860, %bb49.fragment ]		; <i32> [#uses=1]
+	%.SV.phi1052 = phi i32 [ %.SV178.phi, %bb92.fragment ], [ undef, %bb49.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1051 = phi i32 [ %226, %bb92.fragment ], [ undef, %bb49.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask949 = and i32 %Opq.link.SV947.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc946 = sub i32 %Opq.link.mask949, -4		; <i32> [#uses=1]
+	%meshCmp383 = icmp eq i32 %meshStackVariable381.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp383, label %bb54, label %bb96
+
+meshBB384:		; preds = %bb95, %bb52
+	%.SV38.phi1221 = phi i64 [ %.SV38.phi1179, %bb52 ], [ %.SV38.phi1218, %bb95 ]		; <i64> [#uses=2]
+	%.SV68.phi1220 = phi i32 [ %.SV68.phi1177, %bb52 ], [ %.SV68.phi1216, %bb95 ]		; <i32> [#uses=2]
+	%.SV70.phi1219 = phi i32 [ %.SV70.phi1176, %bb52 ], [ %.SV70.phi1215, %bb95 ]		; <i32> [#uses=2]
+	%.load53.SV.phi = phi i32* [ undef, %bb52 ], [ %.SV52.phi1217, %bb95 ]		; <i32*> [#uses=1]
+	%.load20.SV.phi = phi i64 [ undef, %bb52 ], [ %3, %bb95 ]		; <i64> [#uses=1]
+	%.load.SV.phi = phi %struct.Macroblock* [ undef, %bb52 ], [ %2, %bb95 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV306.phi = phi i32 [ undef, %bb52 ], [ %227, %bb95 ]		; <i32> [#uses=1]
+	%.SV308.phi = phi i32* [ undef, %bb52 ], [ %228, %bb95 ]		; <i32*> [#uses=1]
+	%.load126.SV.phi = phi i32 [ %.SV118.phi1040, %bb52 ], [ undef, %bb95 ]		; <i32> [#uses=1]
+	%.load44.SV.phi = phi i32 [ %.SV43.phi1178, %bb52 ], [ undef, %bb95 ]		; <i32> [#uses=1]
+	%meshStackVariable385.phi = phi i32 [ %Opq.sa.calc583, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.SV902.phi = phi i32 [ %Opq.sa.calc860, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.SV905.phi = phi i32 [ %Opq.sa.calc584, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.mask907 = and i32 %Opq.link.SV905.phi, 0		; <i32> [#uses=0]
+	%Opq.link.mask904 = and i32 %Opq.link.SV902.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc901 = xor i32 %Opq.link.mask904, 227		; <i32> [#uses=3]
+	%meshCmp387 = icmp eq i32 %meshStackVariable385.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp387, label %bb95.fragment, label %bb52.fragment
+
+meshBB388:		; preds = %bb52.fragment, %bb7
+	%.SV38.phi1118 = phi i64 [ %.SV38.phi1014, %bb7 ], [ %.SV38.phi1221, %bb52.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1117 = phi i32 [ %.SV68.phi1020, %bb7 ], [ %.SV68.phi1220, %bb52.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1116 = phi i32 [ %.SV70.phi1026, %bb7 ], [ %.SV70.phi1219, %bb52.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1054 = phi i32 [ undef, %bb7 ], [ %.load126.SV.phi, %bb52.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1053 = phi i32 [ undef, %bb7 ], [ %137, %bb52.fragment ]		; <i32> [#uses=1]
+	%.load67.SV.phi = phi i32* [ %.SV52.phi, %bb7 ], [ undef, %bb52.fragment ]		; <i32*> [#uses=1]
+	%.load36.SV.phi = phi i64 [ %3, %bb7 ], [ undef, %bb52.fragment ]		; <i64> [#uses=1]
+	%.load17.SV.phi = phi %struct.Macroblock* [ %2, %bb7 ], [ undef, %bb52.fragment ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV194.phi = phi i32 [ %24, %bb7 ], [ undef, %bb52.fragment ]		; <i32> [#uses=1]
+	%.SV196.phi = phi i32* [ %25, %bb7 ], [ undef, %bb52.fragment ]		; <i32*> [#uses=1]
+	%meshStackVariable389.phi = phi i32 [ %Opq.sa.calc476, %bb7 ], [ %Opq.sa.calc844, %bb52.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV887.phi = phi i32 [ %Opq.sa.calc873, %bb7 ], [ %Opq.sa.calc901, %bb52.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask889 = and i32 %Opq.link.SV887.phi, 64		; <i32> [#uses=1]
+	%Opq.sa.calc886 = sub i32 %Opq.link.mask889, -170		; <i32> [#uses=2]
+	%meshCmp391 = icmp eq i32 %meshStackVariable389.phi, 12		; <i1> [#uses=1]
+	br i1 %meshCmp391, label %bb96, label %bb7.fragment
+
+meshBB392:		; preds = %bb4, %entry
+	%meshStackVariable393.phi = phi i32 [ %Opq.sa.calc466, %bb4 ], [ %Opq.sa.calc, %entry ]		; <i32> [#uses=1]
+	%Opq.link.SV922.phi = phi i32 [ %Opq.sa.calc462, %bb4 ], [ %Opq.sa.calc, %entry ]		; <i32> [#uses=1]
+	%or.cond.not.SV.phi = phi i1 [ %or.cond.not, %bb4 ], [ undef, %entry ]		; <i1> [#uses=1]
+	%.SV70.phi1027 = phi i32 [ %12, %bb4 ], [ undef, %entry ]		; <i32> [#uses=2]
+	%.SV52.phi1022 = phi i32* [ %9, %bb4 ], [ undef, %entry ]		; <i32*> [#uses=1]
+	%.SV68.phi1021 = phi i32 [ %10, %bb4 ], [ undef, %entry ]		; <i32> [#uses=2]
+	%.SV43.phi1015 = phi i32 [ %8, %bb4 ], [ undef, %entry ]		; <i32> [#uses=3]
+	%Opq.link.mask924 = and i32 %Opq.link.SV922.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc921 = add i32 %Opq.link.mask924, 57		; <i32> [#uses=3]
+	%meshCmp395 = icmp eq i32 %meshStackVariable393.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp395, label %entry.fragment, label %bb21
+
+meshBB396:		; preds = %bb69.fragment, %bb.fragment
+	%.SV.phi1065 = phi i32 [ undef, %bb.fragment ], [ %171, %bb69.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable397.phi = phi i32 [ %Opq.sa.calc976, %bb.fragment ], [ %Opq.sa.calc995, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV759.phi = phi i32 [ %Opq.sa.calc976, %bb.fragment ], [ %Opq.sa.calc995, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV70.phi = phi i32 [ %12, %bb.fragment ], [ %.SV70.phi1168, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV68.phi = phi i32 [ %10, %bb.fragment ], [ %.SV68.phi1169, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV38.phi = phi i64 [ %4, %bb.fragment ], [ %.SV38.phi1172, %bb69.fragment ]		; <i64> [#uses=1]
+	%Opq.link.mask761 = and i32 %Opq.link.SV759.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc758 = add i32 %Opq.link.mask761, 53		; <i32> [#uses=1]
+	%meshCmp399 = icmp eq i32 %meshStackVariable397.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp399, label %bb96, label %return
+
+meshBB400:		; preds = %bb84, %bb69.fragment
+	%.SV38.phi1191 = phi i64 [ %.SV38.phi1098, %bb84 ], [ %.SV38.phi1172, %bb69.fragment ]		; <i64> [#uses=5]
+	%.SV52.phi1190 = phi i32* [ %.SV52.phi1097, %bb84 ], [ undef, %bb69.fragment ]		; <i32*> [#uses=3]
+	%.SV68.phi1189 = phi i32 [ %.SV68.phi1096, %bb84 ], [ %.SV68.phi1169, %bb69.fragment ]		; <i32> [#uses=5]
+	%.SV70.phi1188 = phi i32 [ %.SV70.phi1095, %bb84 ], [ %.SV70.phi1168, %bb69.fragment ]		; <i32> [#uses=5]
+	%.SV290.phi = phi i32 [ %200, %bb84 ], [ undef, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV164.phi = phi i32 [ undef, %bb84 ], [ %171, %bb69.fragment ]		; <i32> [#uses=2]
+	%meshStackVariable401.phi = phi i32 [ %Opq.sa.calc661, %bb84 ], [ %Opq.sa.calc996, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV825.phi = phi i32 [ %Opq.sa.calc658, %bb84 ], [ %Opq.sa.calc996, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV162.phi = phi i32* [ undef, %bb84 ], [ %169, %bb69.fragment ]		; <i32*> [#uses=1]
+	%.SV156.phi = phi i32* [ undef, %bb84 ], [ %.SV274.phi, %bb69.fragment ]		; <i32*> [#uses=1]
+	%.SV158.phi = phi i32 [ undef, %bb84 ], [ %168, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask827 = and i32 %Opq.link.SV825.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc824 = xor i32 %Opq.link.mask827, 228		; <i32> [#uses=2]
+	%meshCmp403 = icmp eq i32 %meshStackVariable401.phi, 15		; <i1> [#uses=1]
+	br i1 %meshCmp403, label %bb70, label %bb84.fragment
+
+meshBB404:		; preds = %bb96, %bb3
+	%yM.0.reg2mem.1.SV.phi1077 = phi i32 [ %yM.0.SV.phi, %bb96 ], [ undef, %bb3 ]		; <i32> [#uses=1]
+	%meshStackVariable405.phi = phi i32 [ %Opq.sa.calc692, %bb96 ], [ %Opq.sa.calc461, %bb3 ]		; <i32> [#uses=1]
+	%Opq.link.SV940.phi = phi i32 [ %Opq.sa.calc693, %bb96 ], [ %Opq.sa.calc461, %bb3 ]		; <i32> [#uses=1]
+	%or.cond.not.SV.phi1029 = phi i1 [ undef, %bb96 ], [ %or.cond.not, %bb3 ]		; <i1> [#uses=1]
+	%.SV70.phi1028 = phi i32 [ %.SV70.phi1085, %bb96 ], [ %12, %bb3 ]		; <i32> [#uses=2]
+	%.SV52.phi1024 = phi i32* [ undef, %bb96 ], [ %9, %bb3 ]		; <i32*> [#uses=1]
+	%.SV68.phi1023 = phi i32 [ %.SV68.phi1086, %bb96 ], [ %10, %bb3 ]		; <i32> [#uses=2]
+	%.SV38.phi1017 = phi i64 [ %.SV38.phi1087, %bb96 ], [ %4, %bb3 ]		; <i64> [#uses=2]
+	%.SV40.phi = phi i32 [ undef, %bb96 ], [ %6, %bb3 ]		; <i32> [#uses=1]
+	%Opq.link.mask942 = and i32 %Opq.link.SV940.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc939 = sub i32 %Opq.link.mask942, -87		; <i32> [#uses=1]
+	%meshCmp407 = icmp eq i32 %meshStackVariable405.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp407, label %bb56, label %bb98
+
+meshBB408:		; preds = %bb89.fragment, %bb87
+	%.SV38.phi1218 = phi i64 [ %.SV38.phi1191, %bb89.fragment ], [ %.SV38.phi1210, %bb87 ]		; <i64> [#uses=2]
+	%.SV52.phi1217 = phi i32* [ %.SV52.phi1190, %bb89.fragment ], [ %.SV52.phi1235, %bb87 ]		; <i32*> [#uses=1]
+	%.SV68.phi1216 = phi i32 [ %.SV68.phi1189, %bb89.fragment ], [ %.SV68.phi1209, %bb87 ]		; <i32> [#uses=2]
+	%.SV70.phi1215 = phi i32 [ %.SV70.phi1188, %bb89.fragment ], [ %.SV70.phi1208, %bb87 ]		; <i32> [#uses=2]
+	%.SV172.phi1074 = phi i32 [ %211, %bb89.fragment ], [ undef, %bb87 ]		; <i32> [#uses=1]
+	%meshStackVariable409.phi = phi i32 [ %Opq.sa.calc962, %bb89.fragment ], [ %Opq.sa.calc673, %bb87 ]		; <i32> [#uses=1]
+	%Opq.link.SV913.phi = phi i32 [ %Opq.sa.calc962, %bb89.fragment ], [ %Opq.sa.calc990, %bb87 ]		; <i32> [#uses=1]
+	%Opq.link.mask915 = and i32 %Opq.link.SV913.phi, 9		; <i32> [#uses=1]
+	%Opq.sa.calc912 = xor i32 %Opq.link.mask915, 195		; <i32> [#uses=1]
+	%meshCmp411 = icmp eq i32 %meshStackVariable409.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp411, label %bb97, label %bb95
+
+meshBB412:		; preds = %bb68.fragment, %bb13.fragment
+	%.SV38.phi1187 = phi i64 [ %.SV38.phi1115, %bb13.fragment ], [ %.SV38.phi1172, %bb68.fragment ]		; <i64> [#uses=2]
+	%.SV52.phi1186 = phi i32* [ %.SV52.phi1113, %bb13.fragment ], [ %.SV52.phi1170, %bb68.fragment ]		; <i32*> [#uses=2]
+	%.SV68.phi1185 = phi i32 [ %.SV68.phi1112, %bb13.fragment ], [ %.SV68.phi1169, %bb68.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1184 = phi i32 [ %.SV70.phi1111, %bb13.fragment ], [ %.SV70.phi1168, %bb68.fragment ]		; <i32> [#uses=2]
+	%.SV158.phi1063 = phi i32 [ undef, %bb13.fragment ], [ %168, %bb68.fragment ]		; <i32> [#uses=1]
+	%.SV87.phi1030 = phi i32 [ %47, %bb13.fragment ], [ undef, %bb68.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable413.phi = phi i32 [ %Opq.sa.calc870, %bb13.fragment ], [ %Opq.sa.calc784, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV933.phi = phi i32 [ %Opq.sa.calc870, %bb13.fragment ], [ %Opq.link.mask722, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV936.phi = phi i32 [ %Opq.sa.calc866, %bb13.fragment ], [ %Opq.sa.calc784, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask938 = and i32 %Opq.link.SV936.phi, 4		; <i32> [#uses=1]
+	%Opq.link.mask935 = and i32 %Opq.link.SV933.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc931 = sub i32 %Opq.link.mask935, %Opq.link.mask938		; <i32> [#uses=1]
+	%Opq.sa.calc932 = xor i32 %Opq.sa.calc931, -51		; <i32> [#uses=3]
+	%meshCmp415 = icmp eq i32 %meshStackVariable413.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp415, label %bb74, label %bb19
+
+meshBB416:		; preds = %bb90.fragment, %bb77
+	%.SV38.phi1201 = phi i64 [ %.SV38.phi1191, %bb90.fragment ], [ %.SV38.phi1098, %bb77 ]		; <i64> [#uses=2]
+	%.SV52.phi1200 = phi i32* [ undef, %bb90.fragment ], [ %.SV52.phi1097, %bb77 ]		; <i32*> [#uses=1]
+	%.SV68.phi1199 = phi i32 [ %.SV68.phi1189, %bb90.fragment ], [ %.SV68.phi1096, %bb77 ]		; <i32> [#uses=2]
+	%.SV70.phi1198 = phi i32 [ %.SV70.phi1188, %bb90.fragment ], [ %.SV70.phi1095, %bb77 ]		; <i32> [#uses=2]
+	%.SV.phi1076 = phi i32 [ %214, %bb90.fragment ], [ undef, %bb77 ]		; <i32> [#uses=1]
+	%meshStackVariable417.phi = phi i32 [ %Opq.sa.calc773, %bb90.fragment ], [ %Opq.sa.calc643, %bb77 ]		; <i32> [#uses=1]
+	%Opq.link.SV973.phi = phi i32 [ %Opq.sa.calc773, %bb90.fragment ], [ %Opq.sa.calc640, %bb77 ]		; <i32> [#uses=1]
+	%Opq.link.mask975 = and i32 %Opq.link.SV973.phi, 10		; <i32> [#uses=1]
+	%Opq.sa.calc972 = xor i32 %Opq.link.mask975, 110		; <i32> [#uses=1]
+	%Opq.sa.calc971 = add i32 %Opq.sa.calc972, -19		; <i32> [#uses=1]
+	%meshCmp419 = icmp eq i32 %meshStackVariable417.phi, 12		; <i1> [#uses=1]
+	br i1 %meshCmp419, label %bb78, label %bb96
+
+meshBB420:		; preds = %bb66, %bb26.fragment
+	%.SV38.phi1194 = phi i64 [ %.SV38.phi1098, %bb66 ], [ %.SV38.phi1167, %bb26.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1193 = phi i32 [ %.SV68.phi1096, %bb66 ], [ %.SV68.phi1166, %bb26.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1192 = phi i32 [ %.SV70.phi1095, %bb66 ], [ %.SV70.phi1165, %bb26.fragment ]		; <i32> [#uses=2]
+	%.load61.SV.phi = phi i32* [ %.SV52.phi1097, %bb66 ], [ undef, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV270.phi = phi i32 [ %165, %bb66 ], [ undef, %bb26.fragment ]		; <i32> [#uses=1]
+	%.SV272.phi = phi i32* [ %166, %bb66 ], [ undef, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV.phi1044 = phi i32 [ undef, %bb66 ], [ %.load123.SV.phi, %bb26.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable421.phi = phi i32 [ %Opq.sa.calc621, %bb66 ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV838.phi = phi i32 [ %Opq.sa.calc602, %bb66 ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask840 = and i32 %Opq.link.SV838.phi, 9		; <i32> [#uses=2]
+	%Opq.sa.calc837 = sub i32 %Opq.link.mask840, -202		; <i32> [#uses=2]
+	%Opq.sa.calc835 = sub i32 %Opq.sa.calc837, %Opq.link.mask840		; <i32> [#uses=1]
+	%Opq.sa.calc836 = xor i32 %Opq.sa.calc835, 176		; <i32> [#uses=0]
+	%meshCmp423 = icmp eq i32 %meshStackVariable421.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp423, label %bb96, label %bb66.fragment
+
+meshBB424:		; preds = %bb86.fragment, %bb83
+	%.SV38.phi1197 = phi i64 [ %.SV38.phi1231, %bb86.fragment ], [ %.SV38.phi1098, %bb83 ]		; <i64> [#uses=2]
+	%.SV68.phi1196 = phi i32 [ %.SV68.phi1229, %bb86.fragment ], [ %.SV68.phi1096, %bb83 ]		; <i32> [#uses=2]
+	%.SV70.phi1195 = phi i32 [ %.SV70.phi1228, %bb86.fragment ], [ %.SV70.phi1095, %bb83 ]		; <i32> [#uses=2]
+	%.SV.phi1072 = phi i32 [ %209, %bb86.fragment ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%meshStackVariable425.phi = phi i32 [ %Opq.sa.calc943, %bb86.fragment ], [ %Opq.sa.calc658, %bb83 ]		; <i32> [#uses=1]
+	%Opq.link.SV951.phi = phi i32 [ %Opq.sa.calc943, %bb86.fragment ], [ %Opq.sa.calc1002, %bb83 ]		; <i32> [#uses=1]
+	%Opq.link.mask953 = and i32 %Opq.link.SV951.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc950 = sub i32 %Opq.link.mask953, -208		; <i32> [#uses=0]
+	%meshCmp427 = icmp eq i32 %meshStackVariable425.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp427, label %bb97, label %bb96
+
+meshBB428:		; preds = %bb70, %bb4
+	%.SV158.phi1090 = phi i32 [ %.SV158.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.SV162.phi1089 = phi i32* [ %.SV162.phi, %bb70 ], [ undef, %bb4 ]		; <i32*> [#uses=1]
+	%.SV164.phi1088 = phi i32 [ %.SV164.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.load165.SV.phi = phi i32 [ %.SV164.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.SV278.phi = phi %struct.Macroblock* [ %176, %bb70 ], [ undef, %bb4 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV280.phi = phi i32 [ %177, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%meshStackVariable429.phi = phi i32 [ %Opq.sa.calc630, %bb70 ], [ %Opq.sa.calc467, %bb4 ]		; <i32> [#uses=1]
+	%Opq.link.SV898.phi = phi i32 [ %Opq.sa.calc630, %bb70 ], [ %Opq.sa.calc462, %bb4 ]		; <i32> [#uses=1]
+	%.SV70.phi1026 = phi i32 [ %.SV70.phi1188, %bb70 ], [ %12, %bb4 ]		; <i32> [#uses=5]
+	%.SV52.phi = phi i32* [ undef, %bb70 ], [ %9, %bb4 ]		; <i32*> [#uses=3]
+	%.SV68.phi1020 = phi i32 [ %.SV68.phi1189, %bb70 ], [ %10, %bb4 ]		; <i32> [#uses=5]
+	%.SV38.phi1014 = phi i64 [ %.SV38.phi1191, %bb70 ], [ %4, %bb4 ]		; <i64> [#uses=5]
+	%.SV43.phi = phi i32 [ undef, %bb70 ], [ %8, %bb4 ]		; <i32> [#uses=1]
+	%Opq.link.mask900 = and i32 %Opq.link.SV898.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc897 = xor i32 %Opq.link.mask900, 193		; <i32> [#uses=3]
+	%meshCmp431 = icmp eq i32 %meshStackVariable429.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp431, label %bb5, label %bb70.fragment
+
+meshBB432:		; preds = %bb42, %bb23.fragment182
+	%.SV38.phi1179 = phi i64 [ %.SV38.phi1115, %bb23.fragment182 ], [ %.SV38.phi1231, %bb42 ]		; <i64> [#uses=7]
+	%.SV43.phi1178 = phi i32 [ %.SV43.phi1015, %bb23.fragment182 ], [ %.SV43.phi1230, %bb42 ]		; <i32> [#uses=3]
+	%.SV68.phi1177 = phi i32 [ %.SV68.phi1112, %bb23.fragment182 ], [ %.SV68.phi1229, %bb42 ]		; <i32> [#uses=7]
+	%.SV70.phi1176 = phi i32 [ %.SV70.phi1111, %bb23.fragment182 ], [ %.SV70.phi1228, %bb42 ]		; <i32> [#uses=7]
+	%.SV118.phi1040 = phi i32 [ %76, %bb23.fragment182 ], [ %.SV118.phi1125, %bb42 ]		; <i32> [#uses=7]
+	%.SV135.phi1039 = phi i1 [ %78, %bb23.fragment182 ], [ undef, %bb42 ]		; <i1> [#uses=2]
+	%meshStackVariable433.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc560, %bb42 ]		; <i32> [#uses=1]
+	%Opq.link.SV799.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc987, %bb42 ]		; <i32> [#uses=1]
+	%.SV96.phi1038 = phi i1 [ %71, %bb23.fragment182 ], [ undef, %bb42 ]		; <i1> [#uses=1]
+	%.SV99.phi1037 = phi i32* [ %72, %bb23.fragment182 ], [ undef, %bb42 ]		; <i32*> [#uses=2]
+	%.SV104.phi1036 = phi i32 [ %73, %bb23.fragment182 ], [ %.SV104.phi1127, %bb42 ]		; <i32> [#uses=3]
+	%.SV111.phi1035 = phi i32* [ %74, %bb23.fragment182 ], [ %.SV111.phi1126, %bb42 ]		; <i32*> [#uses=3]
+	%Opq.link.mask801 = and i32 %Opq.link.SV799.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc798 = xor i32 %Opq.link.mask801, 3		; <i32> [#uses=5]
+	%meshCmp435 = icmp eq i32 %meshStackVariable433.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp435, label %bb43, label %bb39
+
+meshBB436:		; preds = %bb71.fragment, %bb65
+	%.SV38.phi1147 = phi i64 [ %.SV38.phi1144, %bb65 ], [ %.SV38.phi1140, %bb71.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1146 = phi i32 [ %.SV68.phi1142, %bb65 ], [ %.SV68.phi1139, %bb71.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1145 = phi i32 [ %.SV70.phi1141, %bb65 ], [ %.SV70.phi1138, %bb71.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1067 = phi i32 [ undef, %bb65 ], [ %.load166.SV.phi, %bb71.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1066 = phi i32 [ undef, %bb65 ], [ %183, %bb71.fragment ]		; <i32> [#uses=1]
+	%.load62.SV.phi = phi i32* [ %.SV52.phi1143, %bb65 ], [ undef, %bb71.fragment ]		; <i32*> [#uses=1]
+	%.SV268.phi = phi i32 [ %164, %bb65 ], [ undef, %bb71.fragment ]		; <i32> [#uses=2]
+	%meshStackVariable437.phi = phi i32 [ %Opq.sa.calc617, %bb65 ], [ %Opq.sa.calc809, %bb71.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV704.phi = phi i32 [ %Opq.sa.calc617, %bb65 ], [ %Opq.sa.calc809, %bb71.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask706 = and i32 %Opq.link.SV704.phi, 0		; <i32> [#uses=2]
+	%Opq.sa.calc703 = add i32 %Opq.link.mask706, 216		; <i32> [#uses=0]
+	%meshCmp439 = icmp eq i32 %meshStackVariable437.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp439, label %bb96, label %bb65.fragment
+
+meshBB440:		; preds = %bb85, %bb54.fragment
+	%.SV52.phi1235 = phi i32* [ %.SV52.phi1213, %bb85 ], [ undef, %bb54.fragment ]		; <i32*> [#uses=2]
+	%.SV38.phi1210 = phi i64 [ %.SV38.phi1214, %bb85 ], [ %.SV38.phi1207, %bb54.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1209 = phi i32 [ %.SV68.phi1212, %bb85 ], [ %.SV68.phi1206, %bb54.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1208 = phi i32 [ %.SV70.phi1211, %bb85 ], [ %.SV70.phi1205, %bb54.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1056 = phi i32 [ undef, %bb85 ], [ %.SV118.phi1122, %bb54.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable441.phi = phi i32 [ %Opq.sa.calc666, %bb85 ], [ %Opq.sa.calc883, %bb54.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV991.phi = phi i32 [ %Opq.sa.calc665, %bb85 ], [ %Opq.sa.calc883, %bb54.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask993 = and i32 %Opq.link.SV991.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc990 = xor i32 %Opq.link.mask993, 139		; <i32> [#uses=2]
+	%meshCmp443 = icmp eq i32 %meshStackVariable441.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp443, label %bb96, label %bb87
+
+meshBB444:		; preds = %bb66.fragment, %bb40
+	%.SV38.phi1224 = phi i64 [ %.SV38.phi1194, %bb66.fragment ], [ %.SV38.phi1179, %bb40 ]		; <i64> [#uses=2]
+	%.SV68.phi1223 = phi i32 [ %.SV68.phi1193, %bb66.fragment ], [ %.SV68.phi1177, %bb40 ]		; <i32> [#uses=2]
+	%.SV70.phi1222 = phi i32 [ %.SV70.phi1192, %bb66.fragment ], [ %.SV70.phi1176, %bb40 ]		; <i32> [#uses=2]
+	%.SV.phi1048 = phi i32 [ undef, %bb66.fragment ], [ %.SV118.phi1040, %bb40 ]		; <i32> [#uses=1]
+	%meshStackVariable445.phi = phi i32 [ %Opq.sa.calc794, %bb66.fragment ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.SV781.phi = phi i32 [ %Opq.sa.calc795, %bb66.fragment ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.mask783 = and i32 %Opq.link.SV781.phi, 10		; <i32> [#uses=1]
+	%Opq.sa.calc780 = add i32 %Opq.link.mask783, 1		; <i32> [#uses=0]
+	%meshCmp447 = icmp eq i32 %meshStackVariable445.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp447, label %bb96, label %bb98
+
+meshBB448:		; preds = %bb35, %entry.fragment181
+	%.SV70.phi1233 = phi i32 [ undef, %entry.fragment181 ], [ %.SV70.phi1180, %bb35 ]		; <i32> [#uses=1]
+	%.SV104.phi1157 = phi i32 [ undef, %entry.fragment181 ], [ %.SV104.phi1084, %bb35 ]		; <i32> [#uses=1]
+	%.SV111.phi1156 = phi i32* [ undef, %entry.fragment181 ], [ %.SV111.phi1083, %bb35 ]		; <i32*> [#uses=1]
+	%.SV118.phi1155 = phi i32 [ undef, %entry.fragment181 ], [ %.SV118.phi1082, %bb35 ]		; <i32> [#uses=1]
+	%.SV68.phi1025 = phi i32 [ %10, %entry.fragment181 ], [ %.SV68.phi1181, %bb35 ]		; <i32> [#uses=1]
+	%meshStackVariable449.phi = phi i32 [ %Opq.sa.calc863, %entry.fragment181 ], [ %Opq.sa.calc541, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.SV959.phi = phi i32 [ %Opq.sa.calc863, %entry.fragment181 ], [ %Opq.sa.calc828, %bb35 ]		; <i32> [#uses=1]
+	%.SV38.phi1019 = phi i64 [ %4, %entry.fragment181 ], [ %.SV38.phi1183, %bb35 ]		; <i64> [#uses=1]
+	%.SV43.phi1018 = phi i32 [ %8, %entry.fragment181 ], [ %.SV43.phi1015, %bb35 ]		; <i32> [#uses=2]
+	%Opq.link.mask961 = and i32 %Opq.link.SV959.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc958 = xor i32 %Opq.link.mask961, 63		; <i32> [#uses=3]
+	%Opq.sa.calc957 = xor i32 %Opq.sa.calc958, 126		; <i32> [#uses=1]
+	%meshCmp451 = icmp eq i32 %meshStackVariable449.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp451, label %bb37, label %return
+
+meshBB452:		; preds = %bb70.fragment, %bb63
+	%.SV38.phi1110 = phi i64 [ %.SV38.phi1014, %bb70.fragment ], [ %.SV38.phi1098, %bb63 ]		; <i64> [#uses=3]
+	%.SV52.phi1109 = phi i32* [ undef, %bb70.fragment ], [ %.SV52.phi1097, %bb63 ]		; <i32*> [#uses=2]
+	%.SV68.phi1108 = phi i32 [ %.SV68.phi1020, %bb70.fragment ], [ %.SV68.phi1096, %bb63 ]		; <i32> [#uses=3]
+	%.SV70.phi1107 = phi i32 [ %.SV70.phi1026, %bb70.fragment ], [ %.SV70.phi1095, %bb63 ]		; <i32> [#uses=3]
+	%.SV158.phi1106 = phi i32 [ %.SV158.phi1090, %bb70.fragment ], [ undef, %bb63 ]		; <i32> [#uses=1]
+	%.SV162.phi1105 = phi i32* [ %.SV162.phi1089, %bb70.fragment ], [ undef, %bb63 ]		; <i32*> [#uses=1]
+	%.SV164.phi1104 = phi i32 [ %.SV164.phi1088, %bb70.fragment ], [ undef, %bb63 ]		; <i32> [#uses=1]
+	%.SV264.phi = phi %struct.Macroblock* [ undef, %bb70.fragment ], [ %157, %bb63 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV266.phi = phi i32 [ undef, %bb70.fragment ], [ %158, %bb63 ]		; <i32> [#uses=1]
+	%meshStackVariable453.phi = phi i32 [ %Opq.sa.calc739, %bb70.fragment ], [ %Opq.sa.calc611, %bb63 ]		; <i32> [#uses=1]
+	%Opq.link.SV821.phi = phi i32 [ %Opq.sa.calc897, %bb70.fragment ], [ %Opq.sa.calc611, %bb63 ]		; <i32> [#uses=1]
+	%.SV150.phi1060 = phi i32* [ undef, %bb70.fragment ], [ %148, %bb63 ]		; <i32*> [#uses=1]
+	%.SV152.phi1059 = phi i32* [ undef, %bb70.fragment ], [ %149, %bb63 ]		; <i32*> [#uses=2]
+	%.SV148.phi1057 = phi i32 [ undef, %bb70.fragment ], [ %147, %bb63 ]		; <i32> [#uses=1]
+	%Opq.link.mask823 = and i32 %Opq.link.SV821.phi, 4		; <i32> [#uses=2]
+	%Opq.sa.calc820 = sub i32 %Opq.link.mask823, -97		; <i32> [#uses=2]
+	%meshCmp455 = icmp eq i32 %meshStackVariable453.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp455, label %bb63.fragment, label %bb71
+
+meshBB456:		; preds = %bb79, %bb63.fragment
+	%.SV38.phi1137 = phi i64 [ %.SV38.phi1110, %bb63.fragment ], [ %.SV38.phi1098, %bb79 ]		; <i64> [#uses=2]
+	%.SV52.phi1136 = phi i32* [ %.SV52.phi1109, %bb63.fragment ], [ %.SV52.phi1097, %bb79 ]		; <i32*> [#uses=2]
+	%.SV68.phi1135 = phi i32 [ %.SV68.phi1108, %bb63.fragment ], [ %.SV68.phi1096, %bb79 ]		; <i32> [#uses=2]
+	%.SV70.phi1134 = phi i32 [ %.SV70.phi1107, %bb63.fragment ], [ %.SV70.phi1095, %bb79 ]		; <i32> [#uses=2]
+	%.SV152.phi1133 = phi i32* [ %.SV152.phi1059, %bb63.fragment ], [ undef, %bb79 ]		; <i32*> [#uses=1]
+	%meshStackVariable457.phi = phi i32 [ %Opq.sa.calc890, %bb63.fragment ], [ %Opq.sa.calc651, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.SV817.phi = phi i32 [ %Opq.sa.calc891, %bb63.fragment ], [ %Opq.sa.calc651, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.mask819 = and i32 %Opq.link.SV817.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc816 = add i32 %Opq.link.mask819, 186		; <i32> [#uses=2]
+	%meshCmp459 = icmp eq i32 %meshStackVariable457.phi, 10		; <i1> [#uses=1]
+	br i1 %meshCmp459, label %bb81, label %bb65
+}

diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
new file mode 100644
index 0000000..d77e528
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+; PR4056
+
+define void @int163(i32 %p_4, i32 %p_5) nounwind {
+entry:
+	%0 = tail call i32 @bar(i32 1) nounwind		; <i32> [#uses=2]
+	%1 = icmp sgt i32 %0, 7		; <i1> [#uses=1]
+	br i1 %1, label %foo.exit, label %bb.i
+
+bb.i:		; preds = %entry
+	%2 = lshr i32 1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	%4 = zext i1 %3 to i32		; <i32> [#uses=1]
+	%.p_5 = shl i32 %p_5, %4		; <i32> [#uses=1]
+	br label %foo.exit
+
+foo.exit:		; preds = %bb.i, %entry
+	%5 = phi i32 [ %.p_5, %bb.i ], [ %p_5, %entry ]		; <i32> [#uses=1]
+	%6 = icmp eq i32 %5, 0		; <i1> [#uses=0]
+	%7 = tail call i32 @bar(i32 %p_5) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @bar(i32)

diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
new file mode 100644
index 0000000..f025654
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+; PR4051
+
+define void @int163(i32 %p_4, i32 %p_5) nounwind {
+entry:
+	%0 = tail call i32 @foo(i32 1) nounwind		; <i32> [#uses=2]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb.i, label %bar.exit
+
+bb.i:		; preds = %entry
+	%2 = lshr i32 1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	%retval.i = select i1 %3, i32 1, i32 %p_5		; <i32> [#uses=1]
+	br label %bar.exit
+
+bar.exit:		; preds = %bb.i, %entry
+	%4 = phi i32 [ %retval.i, %bb.i ], [ %p_5, %entry ]		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=0]
+	%6 = tail call i32 @foo(i32 %p_5) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @foo(i32)

diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
new file mode 100644
index 0000000..0a2fcdb
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll

@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
+; PR4076
+
+	type { i8, i8, i8 }		; type %0
+	type { i32, i8** }		; type %1
+	type { %3* }		; type %2
+	type { %4 }		; type %3
+	type { %5 }		; type %4
+	type { %6, i32, %7 }		; type %5
+	type { i8* }		; type %6
+	type { i32, [12 x i8] }		; type %7
+	type { %9 }		; type %8
+	type { %10, %11*, i8 }		; type %9
+	type { %11* }		; type %10
+	type { i32, %6, i8*, %12, %13*, i8, i32, %28, %29, i32, %30, i32, i32, i32, i8*, i8*, i8, i8 }		; type %11
+	type { %13* }		; type %12
+	type { %14, i32, %13*, %21 }		; type %13
+	type { %15, %16 }		; type %14
+	type { i32 (...)** }		; type %15
+	type { %17, i8* (i32)*, void (i8*)*, i8 }		; type %16
+	type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %18 }		; type %17
+	type { %19* }		; type %18
+	type { i32, %20**, i32, %20**, i8** }		; type %19
+	type { i32 (...)**, i32 }		; type %20
+	type { %22, %25*, i8, i8, %17*, %26*, %27*, %27* }		; type %21
+	type { i32 (...)**, i32, i32, i32, i32, i32, %23*, %24, [8 x %24], i32, %24*, %18 }		; type %22
+	type { %23*, void (i32, %22*, i32)*, i32, i32 }		; type %23
+	type { i8*, i32 }		; type %24
+	type { i32 (...)**, %21 }		; type %25
+	type { %20, i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }		; type %26
+	type { %20 }		; type %27
+	type { void (%9*)*, i32 }		; type %28
+	type { %15* }		; type %29
+	type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }		; type %30
+@AtomicOps_Internalx86CPUFeatures = external global %0		; <%0*> [#uses=1]
+internal constant [19 x i8] c"xxxxxxxxxxxxxxxxxx\00"		; <[19 x i8]*>:0 [#uses=1]
+internal constant [47 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00"		; <[47 x i8]*>:1 [#uses=1]
+
+define i8** @func6(i8 zeroext, i32, i32, %1*) nounwind {
+; <label>:4
+	%5 = alloca i32, align 4		; <i32*> [#uses=2]
+	%6 = alloca i32, align 4		; <i32*> [#uses=2]
+	%7 = alloca %2, align 8		; <%2*> [#uses=3]
+	%8 = alloca %8, align 8		; <%8*> [#uses=2]
+	br label %17
+
+; <label>:9		; preds = %17
+	%10 = getelementptr %1* %3, i32 %19, i32 0		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, %2		; <i1> [#uses=1]
+	br i1 %12, label %13, label %16
+
+; <label>:13		; preds = %9
+	%14 = getelementptr %1* %3, i32 %19, i32 1		; <i8***> [#uses=1]
+	%15 = load i8*** %14, align 4		; <i8**> [#uses=1]
+	ret i8** %15
+
+; <label>:16		; preds = %9
+	%indvar.next13 = add i32 %18, 1		; <i32> [#uses=1]
+	br label %17
+
+; <label>:17		; preds = %16, %4
+	%18 = phi i32 [ 0, %4 ], [ %indvar.next13, %16 ]		; <i32> [#uses=2]
+	%19 = add i32 %18, %1		; <i32> [#uses=3]
+	%20 = icmp sgt i32 %19, 3		; <i1> [#uses=1]
+	br i1 %20, label %21, label %9
+
+; <label>:21		; preds = %17
+	call void @func5()
+	%22 = getelementptr %1* %3, i32 0, i32 0		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 4		; <i32> [#uses=1]
+	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
+	br i1 %24, label %._crit_edge, label %._crit_edge1
+
+._crit_edge1:		; preds = %._crit_edge1, %21
+	%25 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=1]
+	%26 = add i32 %25, 1		; <i32> [#uses=4]
+	%27 = getelementptr %1* %3, i32 %26, i32 0		; <i32*> [#uses=1]
+	%28 = load i32* %27, align 4		; <i32> [#uses=1]
+	%29 = icmp ne i32 %28, 0		; <i1> [#uses=1]
+	%30 = icmp ne i32 %26, 4		; <i1> [#uses=1]
+	%31 = and i1 %29, %30		; <i1> [#uses=1]
+	br i1 %31, label %._crit_edge1, label %._crit_edge
+
+._crit_edge:		; preds = %._crit_edge1, %21
+	%32 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=3]
+	%33 = call i8* @pthread_getspecific(i32 0) nounwind		; <i8*> [#uses=2]
+	%34 = icmp ne i8* %33, null		; <i1> [#uses=1]
+	%35 = icmp eq i8 %0, 0		; <i1> [#uses=1]
+	%36 = or i1 %34, %35		; <i1> [#uses=1]
+	br i1 %36, label %._crit_edge4, label %37
+
+; <label>:37		; preds = %._crit_edge
+	%38 = call i8* @func2(i32 2048)		; <i8*> [#uses=4]
+	call void @llvm.memset.i32(i8* %38, i8 0, i32 2048, i32 4)
+	%39 = call i32 @pthread_setspecific(i32 0, i8* %38) nounwind		; <i32> [#uses=2]
+	store i32 %39, i32* %5
+	store i32 0, i32* %6
+	%40 = icmp eq i32 %39, 0		; <i1> [#uses=1]
+	br i1 %40, label %41, label %43
+
+; <label>:41		; preds = %37
+	%42 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
+	store %3* null, %3** %42, align 8
+	br label %._crit_edge4
+
+; <label>:43		; preds = %37
+	%44 = call %3* @func1(i32* %5, i32* %6, i8* getelementptr ([47 x i8]* @1, i32 0, i32 0))		; <%3*> [#uses=2]
+	%45 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
+	store %3* %44, %3** %45, align 8
+	%46 = icmp eq %3* %44, null		; <i1> [#uses=1]
+	br i1 %46, label %._crit_edge4, label %47
+
+; <label>:47		; preds = %43
+	call void @func4(%8* %8, i8* getelementptr ([19 x i8]* @0, i32 0, i32 0), i32 165, %2* %7)
+	call void @func3(%8* %8) noreturn
+	unreachable
+
+._crit_edge4:		; preds = %43, %41, %._crit_edge
+	%48 = phi i8* [ %38, %41 ], [ %33, %._crit_edge ], [ %38, %43 ]		; <i8*> [#uses=2]
+	%49 = bitcast i8* %48 to i8**		; <i8**> [#uses=3]
+	%50 = icmp ne i8* %48, null		; <i1> [#uses=1]
+	%51 = icmp slt i32 %32, 4		; <i1> [#uses=1]
+	%52 = and i1 %50, %51		; <i1> [#uses=1]
+	br i1 %52, label %53, label %._crit_edge6
+
+; <label>:53		; preds = %._crit_edge4
+	%54 = getelementptr %1* %3, i32 %32, i32 0		; <i32*> [#uses=1]
+	%55 = call i32 asm sideeffect "lock; cmpxchgl $1,$2", "={ax},q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %2, i32* %54, i32 0) nounwind		; <i32> [#uses=1]
+	%56 = load i8* getelementptr (%0* @AtomicOps_Internalx86CPUFeatures, i32 0, i32 0), align 8		; <i8> [#uses=1]
+	%57 = icmp eq i8 %56, 0		; <i1> [#uses=1]
+	br i1 %57, label %._crit_edge7, label %58
+
+; <label>:58		; preds = %53
+	call void asm sideeffect "lfence", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
+	br label %._crit_edge7
+
+._crit_edge7:		; preds = %58, %53
+	%59 = icmp eq i32 %55, 0		; <i1> [#uses=1]
+	br i1 %59, label %60, label %._crit_edge6
+
+._crit_edge6:		; preds = %._crit_edge7, %._crit_edge4
+	ret i8** %49
+
+; <label>:60		; preds = %._crit_edge7
+	%61 = getelementptr %1* %3, i32 %32, i32 1		; <i8***> [#uses=1]
+	store i8** %49, i8*** %61, align 4
+	ret i8** %49
+}
+
+declare %3* @func1(i32* nocapture, i32* nocapture, i8*)
+
+declare void @func5()
+
+declare void @func4(%8*, i8*, i32, %2*)
+
+declare void @func3(%8*) noreturn
+
+declare i8* @pthread_getspecific(i32) nounwind
+
+declare i8* @func2(i32)
+
+declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
+
+declare i32 @pthread_setspecific(i32, i8*) nounwind

diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
new file mode 100644
index 0000000..a2fd2e4
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.0"
+
+define void @cpuid(i32* %data) nounwind {
+entry:
+	%arrayidx = getelementptr i32* %data, i32 1		; <i32*> [#uses=1]
+	%arrayidx2 = getelementptr i32* %data, i32 2		; <i32*> [#uses=1]
+	%arrayidx4 = getelementptr i32* %data, i32 3		; <i32*> [#uses=1]
+	%arrayidx6 = getelementptr i32* %data, i32 4		; <i32*> [#uses=1]
+	%arrayidx8 = getelementptr i32* %data, i32 5		; <i32*> [#uses=1]
+	%tmp9 = load i32* %arrayidx8		; <i32> [#uses=1]
+	%arrayidx11 = getelementptr i32* %data, i32 6		; <i32*> [#uses=1]
+	%tmp12 = load i32* %arrayidx11		; <i32> [#uses=1]
+	%arrayidx14 = getelementptr i32* %data, i32 7		; <i32*> [#uses=1]
+	%tmp15 = load i32* %arrayidx14		; <i32> [#uses=1]
+	%arrayidx17 = getelementptr i32* %data, i32 8		; <i32*> [#uses=1]
+	%tmp18 = load i32* %arrayidx17		; <i32> [#uses=1]
+	%0 = call i32 asm "cpuid", "={ax},=*{bx},=*{cx},=*{dx},{ax},{bx},{cx},{dx},~{dirflag},~{fpsr},~{flags}"(i32* %arrayidx2, i32* %arrayidx4, i32* %arrayidx6, i32 %tmp9, i32 %tmp12, i32 %tmp15, i32 %tmp18) nounwind		; <i32> [#uses=1]
+	store i32 %0, i32* %arrayidx
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
new file mode 100644
index 0000000..6843723
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll

@@ -0,0 +1,215 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; rdar://6837009
+
+	type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }		; type %0
+	type { %2 }		; type %1
+	type { %struct.pf_addr, %struct.pf_addr }		; type %2
+	type { %struct.in6_addr }		; type %3
+	type { [4 x i32] }		; type %4
+	type { %struct.pfi_dynaddr*, [4 x i8] }		; type %5
+	type { %struct.pfi_dynaddr*, %struct.pfi_dynaddr** }		; type %6
+	type { %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pfr_ktable*, i32 }		; type %7
+	type { %struct.pfr_ktable* }		; type %8
+	type { i8* }		; type %9
+	type { %11 }		; type %10
+	type { i8*, i8*, %struct.radix_node* }		; type %11
+	type { [2 x %struct.pf_rulequeue], %13, %13 }		; type %12
+	type { %struct.pf_rulequeue*, %struct.pf_rule**, i32, i32, i32 }		; type %13
+	type { %struct.pf_anchor*, %struct.pf_anchor*, %struct.pf_anchor*, i32 }		; type %14
+	type { %struct.pfi_kif*, %struct.pfi_kif*, %struct.pfi_kif*, i32 }		; type %15
+	type { %struct.ifnet*, %struct.ifnet** }		; type %16
+	type { %18 }		; type %17
+	type { %struct.pkthdr, %19 }		; type %18
+	type { %struct.m_ext, [176 x i8] }		; type %19
+	type { %struct.ifmultiaddr*, %struct.ifmultiaddr** }		; type %20
+	type { i32, %22 }		; type %21
+	type { i8*, [4 x i8] }		; type %22
+	type { %struct.tcphdr* }		; type %23
+	type { %struct.pf_ike_state }		; type %24
+	type { %struct.pf_state_key*, %struct.pf_state_key*, %struct.pf_state_key*, i32 }		; type %25
+	type { %struct.pf_src_node*, %struct.pf_src_node*, %struct.pf_src_node*, i32 }		; type %26
+	%struct.anon = type { %struct.pf_state*, %struct.pf_state** }
+	%struct.au_mask_t = type { i32, i32 }
+	%struct.bpf_if = type opaque
+	%struct.dlil_threading_info = type opaque
+	%struct.ether_header = type { [6 x i8], [6 x i8], i16 }
+	%struct.ext_refsq = type { %struct.ext_refsq*, %struct.ext_refsq* }
+	%struct.hook_desc = type { %struct.hook_desc_head, void (i8*)*, i8* }
+	%struct.hook_desc_head = type { %struct.hook_desc*, %struct.hook_desc** }
+	%struct.if_data_internal = type { i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, %struct.au_mask_t, i32, i32, i32 }
+	%struct.ifaddr = type { %struct.sockaddr*, %struct.sockaddr*, %struct.sockaddr*, %struct.ifnet*, %struct.ifaddrhead, void (i32, %struct.rtentry*, %struct.sockaddr*)*, i32, i32, i32, void (%struct.ifaddr*)*, void (%struct.ifaddr*, i32)*, i32 }
+	%struct.ifaddrhead = type { %struct.ifaddr*, %struct.ifaddr** }
+	%struct.ifmultiaddr = type { %20, %struct.sockaddr*, %struct.ifmultiaddr*, %struct.ifnet*, i32, i8*, i32, void (i8*)* }
+	%struct.ifmultihead = type { %struct.ifmultiaddr* }
+	%struct.ifnet = type { i8*, i8*, %16, %struct.ifaddrhead, i32, i32 (%struct.ifnet*, %struct.sockaddr*)*, i32, %struct.bpf_if*, i16, i16, i16, i16, i32, i8*, i32, %struct.if_data_internal, i32, i32 (%struct.ifnet*, %struct.mbuf*)*, i32 (%struct.ifnet*, i32, i8*)*, i32 (%struct.ifnet*, i32, i32 (%struct.ifnet*, %struct.mbuf*)*)*, void (%struct.ifnet*)*, i32 (%struct.ifnet*, %struct.mbuf*, i8*, i32*)*, void (%struct.ifnet*, %struct.kev_msg*)*, i32 (%struct.ifnet*, %struct.mbuf**, %struct.sockaddr*, i8*, i8*)*, i32, %struct.ifnet_filter_head, i32, i8*, i32, %struct.ifmultihead, i32, i32 (%struct.ifnet*, i32, %struct.ifnet_demux_desc*, i32)*, i32 (%struct.ifnet*, i32)*, %struct.proto_hash_entry*, i8*, %struct.dlil_threading_info*, i8*, %struct.ifqueue, [1 x i32], i32, %struct.ifprefixhead, %struct.lck_rw_t*, %21, i32, %struct.thread*, %struct.pfi_kif*, %struct.lck_mtx_t*, %struct.route }
+	%struct.ifnet_demux_desc = type { i32, i8*, i32 }
+	%struct.ifnet_filter = type opaque
+	%struct.ifnet_filter_head = type { %struct.ifnet_filter*, %struct.ifnet_filter** }
+	%struct.ifprefix = type { %struct.sockaddr*, %struct.ifnet*, %struct.ifprefixhead, i8, i8 }
+	%struct.ifprefixhead = type { %struct.ifprefix*, %struct.ifprefix** }
+	%struct.ifqueue = type { i8*, i8*, i32, i32, i32 }
+	%struct.in6_addr = type { %4 }
+	%struct.in_addr = type { i32 }
+	%struct.kev_d_vectors = type { i32, i8* }
+	%struct.kev_msg = type { i32, i32, i32, i32, [5 x %struct.kev_d_vectors] }
+	%struct.lck_mtx_t = type { [3 x i32] }
+	%struct.lck_rw_t = type <{ [3 x i32] }>
+	%struct.m_ext = type { i8*, void (i8*, i32, i8*)*, i32, i8*, %struct.ext_refsq, %struct.au_mask_t* }
+	%struct.m_hdr = type { %struct.mbuf*, %struct.mbuf*, i32, i8*, i16, i16 }
+	%struct.m_tag = type { %struct.packet_tags, i16, i16, i32 }
+	%struct.mbuf = type { %struct.m_hdr, %17 }
+	%struct.packet_tags = type { %struct.m_tag* }
+	%struct.pf_addr = type { %3 }
+	%struct.pf_addr_wrap = type <{ %1, %5, i8, i8, [6 x i8] }>
+	%struct.pf_anchor = type { %14, %14, %struct.pf_anchor*, %struct.pf_anchor_node, [64 x i8], [1024 x i8], %struct.pf_ruleset, i32, i32 }
+	%struct.pf_anchor_node = type { %struct.pf_anchor* }
+	%struct.pf_app_state = type { void (%struct.pf_state*, i32, i32, %struct.pf_pdesc*, %struct.pfi_kif*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, %24 }
+	%struct.pf_ike_state = type { i64 }
+	%struct.pf_mtag = type { i8*, i32, i32, i16, i8, i8 }
+	%struct.pf_palist = type { %struct.pf_pooladdr*, %struct.pf_pooladdr** }
+	%struct.pf_pdesc = type { %struct.pf_threshold, i64, %23, %struct.pf_addr, %struct.pf_addr, %struct.pf_rule*, %struct.pf_addr*, %struct.pf_addr*, %struct.ether_header*, %struct.mbuf*, i32, %struct.pf_mtag*, i16*, i32, i16, i8, i8, i8, i8 }
+	%struct.pf_pool = type { %struct.pf_palist, [2 x i32], %struct.pf_pooladdr*, [4 x i8], %struct.in6_addr, %struct.pf_addr, i32, [2 x i16], i8, i8, [1 x i32] }
+	%struct.pf_pooladdr = type <{ %struct.pf_addr_wrap, %struct.pf_palist, [2 x i32], [16 x i8], %struct.pfi_kif*, [1 x i32] }>
+	%struct.pf_rule = type <{ %struct.pf_rule_addr, %struct.pf_rule_addr, [8 x %struct.pf_rule_ptr], [64 x i8], [16 x i8], [64 x i8], [64 x i8], [64 x i8], [64 x i8], [32 x i8], %struct.pf_rulequeue, [2 x i32], %struct.pf_pool, i64, [2 x i64], [2 x i64], %struct.pfi_kif*, [4 x i8], %struct.pf_anchor*, [4 x i8], %struct.pfr_ktable*, [4 x i8], i32, i32, [26 x i32], i32, i32, i32, i32, i32, i32, %struct.au_mask_t, i32, i32, i32, i32, i32, i32, i32, i16, i16, i16, i16, i16, [2 x i8], %struct.pf_rule_gid, %struct.pf_rule_gid, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [2 x i8] }>
+	%struct.pf_rule_addr = type <{ %struct.pf_addr_wrap, %struct.pf_rule_xport, i8, [7 x i8] }>
+	%struct.pf_rule_gid = type { [2 x i32], i8, [3 x i8] }
+	%struct.pf_rule_ptr = type { %struct.pf_rule*, [4 x i8] }
+	%struct.pf_rule_xport = type { i32, [4 x i8] }
+	%struct.pf_rulequeue = type { %struct.pf_rule*, %struct.pf_rule** }
+	%struct.pf_ruleset = type { [5 x %12], %struct.pf_anchor*, i32, i32, i32 }
+	%struct.pf_src_node = type <{ %26, %struct.pf_addr, %struct.pf_addr, %struct.pf_rule_ptr, %struct.pfi_kif*, [2 x i64], [2 x i64], i32, i32, %struct.pf_threshold, i64, i64, i8, i8, [2 x i8] }>
+	%struct.pf_state = type <{ i64, i32, i32, %struct.anon, %struct.anon, %0, %struct.pf_state_peer, %struct.pf_state_peer, %struct.pf_rule_ptr, %struct.pf_rule_ptr, %struct.pf_rule_ptr, %struct.pf_addr, %struct.hook_desc_head, %struct.pf_state_key*, %struct.pfi_kif*, %struct.pfi_kif*, %struct.pf_src_node*, %struct.pf_src_node*, [2 x i64], [2 x i64], i64, i64, i64, i16, i8, i8, i8, i8, [6 x i8] }>
+	%struct.pf_state_host = type { %struct.pf_addr, %struct.in_addr }
+	%struct.pf_state_key = type { %struct.pf_state_host, %struct.pf_state_host, %struct.pf_state_host, i8, i8, i8, i8, %struct.pf_app_state*, %25, %25, %struct.anon, i16 }
+	%struct.pf_state_peer = type { i32, i32, i32, i16, i8, i8, i16, i8, %struct.pf_state_scrub*, [3 x i8] }
+	%struct.pf_state_scrub = type { %struct.au_mask_t, i32, i32, i32, i16, i8, i8, i32 }
+	%struct.pf_threshold = type { i32, i32, i32, i32 }
+	%struct.pfi_dynaddr = type { %6, %struct.pf_addr, %struct.pf_addr, %struct.pf_addr, %struct.pf_addr, %struct.pfr_ktable*, %struct.pfi_kif*, i8*, i32, i32, i32, i8, i8 }
+	%struct.pfi_kif = type { [16 x i8], %15, [2 x [2 x [2 x i64]]], [2 x [2 x [2 x i64]]], i64, i32, i8*, %struct.ifnet*, i32, i32, %6 }
+	%struct.pfr_ktable = type { %struct.pfr_tstats, %7, %8, %struct.radix_node_head*, %struct.radix_node_head*, %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pf_ruleset*, i64, i32 }
+	%struct.pfr_table = type { [1024 x i8], [32 x i8], i32, i8 }
+	%struct.pfr_tstats = type { %struct.pfr_table, [2 x [3 x i64]], [2 x [3 x i64]], i64, i64, i64, i32, [2 x i32] }
+	%struct.pkthdr = type { i32, %struct.ifnet*, i8*, i32, i32, i32, i16, i16, %struct.packet_tags }
+	%struct.proto_hash_entry = type opaque
+	%struct.radix_mask = type { i16, i8, i8, %struct.radix_mask*, %9, i32 }
+	%struct.radix_node = type { %struct.radix_mask*, %struct.radix_node*, i16, i8, i8, %10 }
+	%struct.radix_node_head = type { %struct.radix_node*, i32, i32, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, %struct.radix_node*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, %struct.radix_node*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, %struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, %struct.radix_node* (i8*, %struct.radix_node_head*)*, i32 (%struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, i32 (%struct.radix_node_head*, i8*, i8*, i32 (%struct.radix_node*, i8*)*, i8*)*, void (%struct.radix_node*, %struct.radix_node_head*)*, [3 x %struct.radix_node], i32 }
+	%struct.route = type { %struct.rtentry*, i32, %struct.sockaddr }
+	%struct.rt_metrics = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x i32] }
+	%struct.rtentry = type { [2 x %struct.radix_node], %struct.sockaddr*, i32, i32, %struct.ifnet*, %struct.ifaddr*, %struct.sockaddr*, i8*, void (i8*)*, %struct.rt_metrics, %struct.rtentry*, %struct.rtentry*, i32, %struct.lck_mtx_t }
+	%struct.sockaddr = type { i8, i8, [14 x i8] }
+	%struct.tcphdr = type { i16, i16, i32, i32, i8, i8, i16, i16, i16 }
+	%struct.thread = type opaque
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.pf_state_key*, %struct.pf_state_key*)* @pf_state_compare_ext_gwy to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @pf_state_compare_ext_gwy(%struct.pf_state_key* nocapture %a, %struct.pf_state_key* nocapture %b) nounwind optsize ssp {
+entry:
+	%0 = zext i8 0 to i32		; <i32> [#uses=2]
+	%1 = load i8* null, align 1		; <i8> [#uses=2]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = sub i32 %0, %2		; <i32> [#uses=1]
+	%4 = icmp eq i8 0, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb1, label %bb79
+
+bb1:		; preds = %entry
+	%5 = load i8* null, align 4		; <i8> [#uses=2]
+	%6 = zext i8 %5 to i32		; <i32> [#uses=2]
+	%7 = getelementptr %struct.pf_state_key* %b, i32 0, i32 3		; <i8*> [#uses=1]
+	%8 = load i8* %7, align 4		; <i8> [#uses=2]
+	%9 = zext i8 %8 to i32		; <i32> [#uses=1]
+	%10 = sub i32 %6, %9		; <i32> [#uses=1]
+	%11 = icmp eq i8 %5, %8		; <i1> [#uses=1]
+	br i1 %11, label %bb3, label %bb79
+
+bb3:		; preds = %bb1
+	switch i32 %0, label %bb23 [
+		i32 1, label %bb4
+		i32 6, label %bb6
+		i32 17, label %bb10
+		i32 47, label %bb17
+		i32 50, label %bb21
+		i32 58, label %bb4
+	]
+
+bb4:		; preds = %bb3, %bb3
+	%12 = load i16* null, align 4		; <i16> [#uses=1]
+	%13 = zext i16 %12 to i32		; <i32> [#uses=1]
+	%14 = sub i32 0, %13		; <i32> [#uses=1]
+	br i1 false, label %bb23, label %bb79
+
+bb6:		; preds = %bb3
+	%15 = load i16* null, align 4		; <i16> [#uses=1]
+	%16 = zext i16 %15 to i32		; <i32> [#uses=1]
+	%17 = sub i32 0, %16		; <i32> [#uses=1]
+	ret i32 %17
+
+bb10:		; preds = %bb3
+	%18 = load i8* null, align 1		; <i8> [#uses=2]
+	%19 = zext i8 %18 to i32		; <i32> [#uses=1]
+	%20 = sub i32 0, %19		; <i32> [#uses=1]
+	%21 = icmp eq i8 0, %18		; <i1> [#uses=1]
+	br i1 %21, label %bb12, label %bb79
+
+bb12:		; preds = %bb10
+	%22 = load i16* null, align 4		; <i16> [#uses=1]
+	%23 = zext i16 %22 to i32		; <i32> [#uses=1]
+	%24 = sub i32 0, %23		; <i32> [#uses=1]
+	ret i32 %24
+
+bb17:		; preds = %bb3
+	%25 = load i8* null, align 1		; <i8> [#uses=2]
+	%26 = icmp eq i8 %25, 1		; <i1> [#uses=1]
+	br i1 %26, label %bb18, label %bb23
+
+bb18:		; preds = %bb17
+	%27 = icmp eq i8 %25, 0		; <i1> [#uses=1]
+	br i1 %27, label %bb19, label %bb23
+
+bb19:		; preds = %bb18
+	%28 = load i16* null, align 4		; <i16> [#uses=1]
+	%29 = zext i16 %28 to i32		; <i32> [#uses=1]
+	%30 = sub i32 0, %29		; <i32> [#uses=1]
+	br i1 false, label %bb23, label %bb79
+
+bb21:		; preds = %bb3
+	%31 = getelementptr %struct.pf_state_key* %a, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
+	%32 = load i32* %31, align 4		; <i32> [#uses=2]
+	%33 = getelementptr %struct.pf_state_key* %b, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%35 = sub i32 %32, %34		; <i32> [#uses=1]
+	%36 = icmp eq i32 %32, %34		; <i1> [#uses=1]
+	br i1 %36, label %bb23, label %bb79
+
+bb23:		; preds = %bb21, %bb19, %bb18, %bb17, %bb4, %bb3
+	%cond = icmp eq i32 %6, 2		; <i1> [#uses=1]
+	br i1 %cond, label %bb24, label %bb70
+
+bb24:		; preds = %bb23
+	ret i32 1
+
+bb70:		; preds = %bb23
+	%37 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=3]
+	br i1 false, label %bb78, label %bb73
+
+bb73:		; preds = %bb70
+	%38 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=2]
+	%39 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, null		; <i1> [#uses=1]
+	br i1 %39, label %bb78, label %bb74
+
+bb74:		; preds = %bb73
+	%40 = ptrtoint i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %37 to i32		; <i32> [#uses=1]
+	%41 = sub i32 0, %40		; <i32> [#uses=1]
+	%42 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, %37		; <i1> [#uses=1]
+	br i1 %42, label %bb76, label %bb79
+
+bb76:		; preds = %bb74
+	%43 = tail call i32 %37(%struct.pf_app_state* null, %struct.pf_app_state* null) nounwind		; <i32> [#uses=1]
+	ret i32 %43
+
+bb78:		; preds = %bb73, %bb70
+	ret i32 0
+
+bb79:		; preds = %bb74, %bb21, %bb19, %bb10, %bb4, %bb1, %entry
+	%.0 = phi i32 [ %3, %entry ], [ %10, %bb1 ], [ %14, %bb4 ], [ %20, %bb10 ], [ %30, %bb19 ], [ %35, %bb21 ], [ %41, %bb74 ]		; <i32> [#uses=1]
+	ret i32 %.0
+}

diff --git a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
new file mode 100644
index 0000000..d1f9cf8
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll

@@ -0,0 +1,117 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
+; PR4099
+
+	type { [62 x %struct.Bitvec*] }		; type %0
+	type { i8* }		; type %1
+	type { double }		; type %2
+	%struct..5sPragmaType = type { i8*, i32 }
+	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+	%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+	%struct.AuxData = type { i8*, void (i8*)* }
+	%struct.Bitvec = type { i32, i32, i32, %0 }
+	%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+	%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+	%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+	%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+	%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+	%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+	%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+	%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+	%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+	%struct.Context = type { i64, i32, %struct.Fifo }
+	%struct.CountCtx = type { i64 }
+	%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+	%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+	%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+	%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+	%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+	%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+	%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+	%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+	%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+	%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+	%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+	%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+	%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+	%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+	%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+	%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+	%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+	%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+	%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+	%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+	%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+	%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+	%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+	%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+	%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+	%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+	%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+	%struct._OvflCell = type { i8*, i16 }
+	%struct._ht = type { i32, %struct.HashElem* }
+	%struct.sColMap = type { i32, i8* }
+	%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+	%struct.sqlite3InitInfo = type { i32, i32, i8 }
+	%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+	%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+	%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+	%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+	%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+	%struct.sqlite3_mutex = type opaque
+	%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+	%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+	%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+
+define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
+entry:
+	%0 = load i8** null, align 8		; <i8*> [#uses=4]
+	%1 = or i32 0, 0		; <i32> [#uses=1]
+	%2 = icmp slt i32 %sz, 4		; <i1> [#uses=1]
+	%size_addr.0.i = select i1 %2, i32 4, i32 %sz		; <i32> [#uses=1]
+	br label %bb3.i
+
+bb3.i:		; preds = %bb3.i, %entry
+	%3 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%or.cond.i = or i1 %3, false		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %bb5.i, label %bb3.i
+
+bb5.i:		; preds = %bb3.i
+	%4 = getelementptr i8* %0, i64 0		; <i8*> [#uses=1]
+	store i8 0, i8* %4, align 1
+	%5 = getelementptr i8* %0, i64 0		; <i8*> [#uses=1]
+	store i8 0, i8* %5, align 1
+	%6 = add i32 %1, 2		; <i32> [#uses=1]
+	%7 = zext i32 %6 to i64		; <i64> [#uses=2]
+	%8 = getelementptr i8* %0, i64 %7		; <i8*> [#uses=1]
+	%9 = lshr i32 %size_addr.0.i, 8		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i8		; <i8> [#uses=1]
+	store i8 %10, i8* %8, align 1
+	%.sum31.i = add i64 %7, 1		; <i64> [#uses=1]
+	%11 = getelementptr i8* %0, i64 %.sum31.i		; <i8*> [#uses=1]
+	store i8 0, i8* %11, align 1
+	br label %bb11.outer.i
+
+bb11.outer.i:		; preds = %bb11.outer.i, %bb5.i
+	%12 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb12.i, label %bb11.outer.i
+
+bb12.i:		; preds = %bb11.outer.i
+	%i.08 = add i32 %idx, 1		; <i32> [#uses=1]
+	%13 = icmp sgt i32 0, %i.08		; <i1> [#uses=1]
+	br i1 %13, label %bb, label %bb2
+
+bb:		; preds = %bb12.i
+	br label %bb2
+
+bb2:		; preds = %bb, %bb12.i
+	%14 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1		; <i8*> [#uses=1]
+	store i8 1, i8* %14, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-04-scale.ll b/test/CodeGen/X86/2009-04-scale.ll
new file mode 100644
index 0000000..e4c756c
--- /dev/null
+++ b/test/CodeGen/X86/2009-04-scale.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-unknown-linux-gnu
+; PR3995
+
+        %struct.vtable = type { i32 (...)** }
+	%struct.array = type { %struct.impl, [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair] }
+	%struct.impl = type { %struct.vtable, i8, %struct.impl*, i32, i32, i64, i64 }
+	%struct.pair = type { i64, i64 }
+
+define void @test() {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = lshr i32 %0, 8		; <i32> [#uses=1]
+	%2 = and i32 %1, 255		; <i32> [#uses=1]
+	%3 = getelementptr %struct.array* null, i32 0, i32 3		; <[256 x %struct.pair]*> [#uses=1]
+	%4 = getelementptr [256 x %struct.pair]* %3, i32 0, i32 %2		; <%struct.pair*> [#uses=1]
+	%5 = getelementptr %struct.pair* %4, i32 0, i32 1		; <i64*> [#uses=1]
+	%6 = load i64* %5, align 4		; <i64> [#uses=1]
+	%7 = xor i64 0, %6		; <i64> [#uses=1]
+	%8 = xor i64 %7, 0		; <i64> [#uses=1]
+	%9 = xor i64 %8, 0		; <i64> [#uses=1]
+	store i64 %9, i64* null, align 8
+	unreachable
+}

diff --git a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
new file mode 100644
index 0000000..738b5fb
--- /dev/null
+++ b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -relocation-model=static > %t
+; RUN: grep "1: ._pv_cpu_ops+8" %t
+; RUN: grep "2: ._G" %t
+; PR4152
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.pv_cpu_ops = type { i32, [2 x i32] }
+@pv_cpu_ops = external global %struct.pv_cpu_ops		; <%struct.pv_cpu_ops*> [#uses=1]
+@G = external global i32		; <i32*> [#uses=1]
+
+define void @x() nounwind {
+entry:
+	tail call void asm sideeffect "1: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr (%struct.pv_cpu_ops* @pv_cpu_ops, i32 0, i32 1, i32 1)) nounwind
+	tail call void asm sideeffect "2: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* @G) nounwind
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
new file mode 100644
index 0000000..a5e28c0
--- /dev/null
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86
+; PR4188
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+@g_9 = external global i32		; <i32*> [#uses=1]
+
+define i32 @int86(i32 %p_87) nounwind {
+entry:
+	%0 = trunc i32 %p_87 to i8		; <i8> [#uses=1]
+	%1 = icmp ne i8 %0, 0		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb.i, %bb, %entry
+	%2 = volatile load i32* @g_9, align 4		; <i32> [#uses=2]
+	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
+	%4 = and i1 %3, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb.i, label %bb
+
+bb.i:		; preds = %bb
+	%5 = icmp sgt i32 0, %2		; <i1> [#uses=0]
+	br label %bb
+}

diff --git a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
new file mode 100644
index 0000000..6e062fb
--- /dev/null
+++ b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64
+; PR3886
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+        %a = call <1 x i64> @bar()
+        %tmp5.i = extractelement <1 x i64> %a, i32 0
+        %tmp11 = bitcast i64 %tmp5.i to <1 x i64>
+        %tmp8 = extractelement <1 x i64> %tmp11, i32 0
+        %call6 = call i32 (i64)* @foo(i64 %tmp8)
+        ret i32 undef
+}
+
+declare i32 @foo(i64)
+
+declare <1 x i64> @bar()

diff --git a/test/CodeGen/X86/2009-05-23-available_externally.ll b/test/CodeGen/X86/2009-05-23-available_externally.ll
new file mode 100644
index 0000000..94773d9
--- /dev/null
+++ b/test/CodeGen/X86/2009-05-23-available_externally.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
+; PR4253
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i8* %x) nounwind readonly {
+entry:
+	%call = tail call fastcc i32 @atoi(i8* %x) nounwind readonly		; <i32> [#uses=1]
+	ret i32 %call
+}
+
+define available_externally fastcc i32 @atoi(i8* %__nptr) nounwind readonly {
+entry:
+	%call = tail call i64 @strtol(i8* nocapture %__nptr, i8** null, i32 10) nounwind readonly		; <i64> [#uses=1]
+	%conv = trunc i64 %call to i32		; <i32> [#uses=1]
+	ret i32 %conv
+}
+
+declare i64 @strtol(i8*, i8** nocapture, i32) nounwind

diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
new file mode 100644
index 0000000..3cd5416
--- /dev/null
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+
+; Check that the shr(shl X, 56), 48) is not mistakenly turned into
+; a shr (X, -8) that gets subsequently "optimized away" as undef
+; PR4254
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @foo(i64 %b) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: shlq $56, %rdi
+; CHECK: sarq $48, %rdi
+; CHECK: leaq 1(%rdi), %rax
+	%shl = shl i64 %b, 56		; <i64> [#uses=1]
+	%shr = ashr i64 %shl, 48		; <i64> [#uses=1]
+	%add5 = or i64 %shr, 1		; <i64> [#uses=1]
+	ret i64 %add5
+}

diff --git a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
new file mode 100644
index 0000000..2fd42f4
--- /dev/null
+++ b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.tempsym_t = type { i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32 }
+
+define fastcc signext i8 @S_next_symbol(%struct.tempsym_t* %symptr) nounwind ssp {
+entry:
+	br label %bb116
+
+bb:		; preds = %bb116
+	switch i8 undef, label %bb14 [
+		i8 9, label %bb116
+		i8 32, label %bb116
+		i8 10, label %bb116
+		i8 13, label %bb116
+		i8 12, label %bb116
+	]
+
+bb14:		; preds = %bb
+	br i1 undef, label %bb75, label %bb115
+
+bb75:		; preds = %bb14
+	%srcval16 = load i448* null, align 8		; <i448> [#uses=1]
+	%tmp = zext i32 undef to i448		; <i448> [#uses=1]
+	%tmp15 = shl i448 %tmp, 288		; <i448> [#uses=1]
+	%mask = and i448 %srcval16, -2135987035423586845985235064014169866455883682256196619149693890381755748887481053010428711403521		; <i448> [#uses=1]
+	%ins = or i448 %tmp15, %mask		; <i448> [#uses=1]
+	store i448 %ins, i448* null, align 8
+	ret i8 1
+
+bb115:		; preds = %bb14
+	ret i8 1
+
+bb116:		; preds = %bb, %bb, %bb, %bb, %bb, %entry
+	br i1 undef, label %bb, label %bb117
+
+bb117:		; preds = %bb116
+	ret i8 0
+}

diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll
new file mode 100644
index 0000000..af552d4
--- /dev/null
+++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
+
+define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
+newFuncRoot:
+	br label %bb54.i
+
+bb35.i.backedge.exitStub:		; preds = %bb54.i
+	store i32 %6, i32* %.out
+	store i32 %10, i32* %.out1
+	store i32 %11, i32* %.out2
+	store i32 %12, i32* %.out3
+	ret void
+
+bb54.i:		; preds = %newFuncRoot
+	%1 = zext i32 %.reload51 to i64		; <i64> [#uses=1]
+	%2 = getelementptr i32* %0, i64 %1		; <i32*> [#uses=1]
+	%3 = load i32* %2, align 4		; <i32> [#uses=2]
+	%4 = lshr i32 %3, 8		; <i32> [#uses=1]
+	%5 = and i32 %3, 255		; <i32> [#uses=1]
+	%6 = add i32 %5, 4		; <i32> [#uses=1]
+	%7 = zext i32 %4 to i64		; <i64> [#uses=1]
+	%8 = getelementptr i32* %0, i64 %7		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=2]
+	%10 = and i32 %9, 255		; <i32> [#uses=1]
+	%11 = lshr i32 %9, 8		; <i32> [#uses=1]
+	%12 = add i32 %c_nblock_used.2.i, 5		; <i32> [#uses=1]
+	br label %bb35.i.backedge.exitStub
+}

diff --git a/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
new file mode 100644
index 0000000..779f985
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-02-RewriterBug.ll

@@ -0,0 +1,362 @@
+; RUN: llc < %s -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
+; PR4225
+
+define void @sha256_block1(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
+entry:
+	br i1 undef, label %while.end, label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %while.body
+
+while.body:		; preds = %for.end, %bb.nph
+	%indvar2787 = phi i64 [ 0, %bb.nph ], [ %indvar.next2788, %for.end ]		; <i64> [#uses=2]
+	%tmp2791 = mul i64 %indvar2787, 44		; <i64> [#uses=0]
+	%ctg22996 = getelementptr i8* %in, i64 0		; <i8*> [#uses=1]
+	%conv = zext i32 undef to i64		; <i64> [#uses=1]
+	%conv11 = zext i32 undef to i64		; <i64> [#uses=1]
+	%tmp18 = load i32* undef		; <i32> [#uses=1]
+	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
+	%tmp30 = load i32* undef		; <i32> [#uses=1]
+	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=4]
+	%ptrincdec3065 = load i8* null		; <i8> [#uses=1]
+	%conv442709 = zext i8 %ptrincdec3065 to i64		; <i64> [#uses=1]
+	%shl45 = shl i64 %conv442709, 16		; <i64> [#uses=1]
+	%conv632707 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or = or i64 %shl45, 0		; <i64> [#uses=1]
+	%or55 = or i64 %or, %conv632707		; <i64> [#uses=1]
+	%or64 = or i64 %or55, 0		; <i64> [#uses=1]
+	%shr85 = lshr i64 %conv31, 25		; <i64> [#uses=0]
+	%add = add i64 %conv11, 1508970993		; <i64> [#uses=1]
+	%add95 = add i64 %add, 0		; <i64> [#uses=1]
+	%add98 = add i64 %add95, 0		; <i64> [#uses=1]
+	%add99 = add i64 %add98, %or64		; <i64> [#uses=1]
+	%add134 = add i64 %add99, 0		; <i64> [#uses=4]
+	store i32 undef, i32* undef
+	%shl187 = shl i64 %add134, 21		; <i64> [#uses=0]
+	%and203 = and i64 %add134, %conv31		; <i64> [#uses=1]
+	%xor208 = xor i64 0, %and203		; <i64> [#uses=1]
+	%add212 = add i64 0, %xor208		; <i64> [#uses=1]
+	%add213 = add i64 %add212, 0		; <i64> [#uses=1]
+	%add248 = add i64 %add213, 0		; <i64> [#uses=3]
+	%conv2852690 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or277 = or i64 0, %conv2852690		; <i64> [#uses=1]
+	%or286 = or i64 %or277, 0		; <i64> [#uses=1]
+	%neg319 = xor i64 %add248, 4294967295		; <i64> [#uses=1]
+	%and321 = and i64 %neg319, %conv31		; <i64> [#uses=1]
+	%xor322 = xor i64 %and321, 0		; <i64> [#uses=1]
+	%add314 = add i64 %conv, 2870763221		; <i64> [#uses=1]
+	%add323 = add i64 %add314, %or286		; <i64> [#uses=1]
+	%add326 = add i64 %add323, %xor322		; <i64> [#uses=1]
+	%add327 = add i64 %add326, 0		; <i64> [#uses=2]
+	%add362 = add i64 %add327, %conv19		; <i64> [#uses=4]
+	%add365 = add i64 0, %add327		; <i64> [#uses=3]
+	%shl409 = shl i64 %add362, 26		; <i64> [#uses=0]
+	%and431 = and i64 %add362, %add248		; <i64> [#uses=1]
+	%neg433 = xor i64 %add362, -1		; <i64> [#uses=1]
+	%and435 = and i64 %add134, %neg433		; <i64> [#uses=1]
+	%xor436 = xor i64 %and431, %and435		; <i64> [#uses=1]
+	%add428 = add i64 %conv31, 3624381080		; <i64> [#uses=1]
+	%add437 = add i64 %add428, 0		; <i64> [#uses=1]
+	%add440 = add i64 %add437, %xor436		; <i64> [#uses=1]
+	%add441 = add i64 %add440, 0		; <i64> [#uses=1]
+	%shl443 = shl i64 %add365, 30		; <i64> [#uses=1]
+	%and445 = lshr i64 %add365, 2		; <i64> [#uses=1]
+	%shr446 = and i64 %and445, 1073741823		; <i64> [#uses=1]
+	%or447 = or i64 %shr446, %shl443		; <i64> [#uses=1]
+	%xor461 = xor i64 0, %or447		; <i64> [#uses=1]
+	%add473 = add i64 %xor461, 0		; <i64> [#uses=1]
+	%add479 = add i64 %add473, %add441		; <i64> [#uses=3]
+	%conv4932682 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl494 = shl i64 %conv4932682, 16		; <i64> [#uses=1]
+	%ptrincdec4903012 = load i8* null		; <i8> [#uses=1]
+	%conv5032681 = zext i8 %ptrincdec4903012 to i64		; <i64> [#uses=1]
+	%shl504 = shl i64 %conv5032681, 8		; <i64> [#uses=1]
+	%ptrincdec5003009 = load i8* null		; <i8> [#uses=1]
+	%conv5132680 = zext i8 %ptrincdec5003009 to i64		; <i64> [#uses=1]
+	%or495 = or i64 %shl494, 0		; <i64> [#uses=1]
+	%or505 = or i64 %or495, %conv5132680		; <i64> [#uses=1]
+	%or514 = or i64 %or505, %shl504		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%or540 = or i64 undef, 0		; <i64> [#uses=0]
+	%add542 = add i64 %add134, 310598401		; <i64> [#uses=1]
+	%add551 = add i64 %add542, %or514		; <i64> [#uses=1]
+	%add554 = add i64 %add551, 0		; <i64> [#uses=1]
+	%add555 = add i64 %add554, 0		; <i64> [#uses=1]
+	%or561 = or i64 undef, undef		; <i64> [#uses=1]
+	%or567 = or i64 undef, undef		; <i64> [#uses=1]
+	%and572 = lshr i64 %add479, 22		; <i64> [#uses=1]
+	%shr573 = and i64 %and572, 1023		; <i64> [#uses=1]
+	%or574 = or i64 %shr573, 0		; <i64> [#uses=1]
+	%xor568 = xor i64 %or567, %or574		; <i64> [#uses=1]
+	%xor575 = xor i64 %xor568, %or561		; <i64> [#uses=1]
+	%add587 = add i64 %xor575, 0		; <i64> [#uses=1]
+	%add593 = add i64 %add587, %add555		; <i64> [#uses=1]
+	%ptrincdec6043000 = load i8* null		; <i8> [#uses=1]
+	%conv6172676 = zext i8 %ptrincdec6043000 to i64		; <i64> [#uses=1]
+	%shl618 = shl i64 %conv6172676, 8		; <i64> [#uses=1]
+	%ptrincdec6142997 = load i8* %ctg22996		; <i8> [#uses=1]
+	%conv6272675 = zext i8 %ptrincdec6142997 to i64		; <i64> [#uses=1]
+	%or619 = or i64 0, %conv6272675		; <i64> [#uses=1]
+	%or628 = or i64 %or619, %shl618		; <i64> [#uses=1]
+	%add656 = add i64 %add248, 607225278		; <i64> [#uses=1]
+	%add665 = add i64 %add656, %or628		; <i64> [#uses=1]
+	%add668 = add i64 %add665, 0		; <i64> [#uses=1]
+	%add669 = add i64 %add668, 0		; <i64> [#uses=1]
+	%and699 = and i64 %add479, %add365		; <i64> [#uses=1]
+	%xor700 = xor i64 0, %and699		; <i64> [#uses=1]
+	%add701 = add i64 0, %xor700		; <i64> [#uses=1]
+	%add707 = add i64 %add701, %add669		; <i64> [#uses=4]
+	%ptrincdec6242994 = load i8* null		; <i8> [#uses=1]
+	%conv7122673 = zext i8 %ptrincdec6242994 to i64		; <i64> [#uses=1]
+	%shl713 = shl i64 %conv7122673, 24		; <i64> [#uses=1]
+	%conv7412670 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or723 = or i64 0, %shl713		; <i64> [#uses=1]
+	%or733 = or i64 %or723, %conv7412670		; <i64> [#uses=1]
+	%or742 = or i64 %or733, 0		; <i64> [#uses=2]
+	%conv743 = trunc i64 %or742 to i32		; <i32> [#uses=1]
+	store i32 %conv743, i32* undef
+	%xor762 = xor i64 undef, 0		; <i64> [#uses=0]
+	%add770 = add i64 %add362, 1426881987		; <i64> [#uses=1]
+	%add779 = add i64 %add770, %or742		; <i64> [#uses=1]
+	%add782 = add i64 %add779, 0		; <i64> [#uses=1]
+	%add783 = add i64 %add782, 0		; <i64> [#uses=1]
+	%shl785 = shl i64 %add707, 30		; <i64> [#uses=1]
+	%and787 = lshr i64 %add707, 2		; <i64> [#uses=1]
+	%shr788 = and i64 %and787, 1073741823		; <i64> [#uses=1]
+	%or789 = or i64 %shr788, %shl785		; <i64> [#uses=1]
+	%shl791 = shl i64 %add707, 19		; <i64> [#uses=0]
+	%xor803 = xor i64 0, %or789		; <i64> [#uses=1]
+	%and813 = and i64 %add593, %add479		; <i64> [#uses=1]
+	%xor814 = xor i64 0, %and813		; <i64> [#uses=1]
+	%add815 = add i64 %xor803, %xor814		; <i64> [#uses=1]
+	%add821 = add i64 %add815, %add783		; <i64> [#uses=1]
+	%add1160 = add i64 0, %add707		; <i64> [#uses=0]
+	%add1157 = add i64 undef, undef		; <i64> [#uses=0]
+	%ptrincdec11742940 = load i8* null		; <i8> [#uses=1]
+	%conv11872651 = zext i8 %ptrincdec11742940 to i64		; <i64> [#uses=1]
+	%shl1188 = shl i64 %conv11872651, 8		; <i64> [#uses=1]
+	%or1198 = or i64 0, %shl1188		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%add1226 = add i64 %or1198, 3248222580		; <i64> [#uses=1]
+	%add1235 = add i64 %add1226, 0		; <i64> [#uses=1]
+	%add1238 = add i64 %add1235, 0		; <i64> [#uses=1]
+	%add1239 = add i64 %add1238, 0		; <i64> [#uses=1]
+	br label %for.cond
+
+for.cond:		; preds = %for.body, %while.body
+	%add821.pn = phi i64 [ %add821, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%add1239.pn = phi i64 [ %add1239, %while.body ], [ 0, %for.body ]		; <i64> [#uses=0]
+	br i1 undef, label %for.end, label %for.body
+
+for.body:		; preds = %for.cond
+	br label %for.cond
+
+for.end:		; preds = %for.cond
+	%indvar.next2788 = add i64 %indvar2787, 1		; <i64> [#uses=1]
+	br i1 undef, label %while.end, label %while.body
+
+while.end:		; preds = %for.end, %entry
+	ret void
+}
+
+define void @sha256_block2(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
+entry:
+	br i1 undef, label %while.end, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%arrayidx5 = getelementptr i32* %arr, i64 1		; <i32*> [#uses=1]
+	%arrayidx9 = getelementptr i32* %arr, i64 2		; <i32*> [#uses=2]
+	%arrayidx13 = getelementptr i32* %arr, i64 3		; <i32*> [#uses=2]
+	%arrayidx25 = getelementptr i32* %arr, i64 6		; <i32*> [#uses=1]
+	%arrayidx29 = getelementptr i32* %arr, i64 7		; <i32*> [#uses=1]
+	br label %while.body
+
+while.body:		; preds = %for.end, %bb.nph
+	%tmp3 = load i32* %arr		; <i32> [#uses=2]
+	%conv = zext i32 %tmp3 to i64		; <i64> [#uses=1]
+	%tmp10 = load i32* %arrayidx9		; <i32> [#uses=1]
+	%conv11 = zext i32 %tmp10 to i64		; <i64> [#uses=1]
+	%tmp14 = load i32* %arrayidx13		; <i32> [#uses=3]
+	%conv15 = zext i32 %tmp14 to i64		; <i64> [#uses=2]
+	%tmp18 = load i32* undef		; <i32> [#uses=2]
+	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
+	%conv23 = zext i32 undef to i64		; <i64> [#uses=1]
+	%tmp26 = load i32* %arrayidx25		; <i32> [#uses=1]
+	%conv27 = zext i32 %tmp26 to i64		; <i64> [#uses=1]
+	%tmp30 = load i32* %arrayidx29		; <i32> [#uses=2]
+	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=5]
+	%shl72 = shl i64 %conv31, 26		; <i64> [#uses=1]
+	%shr = lshr i64 %conv31, 6		; <i64> [#uses=1]
+	%or74 = or i64 %shl72, %shr		; <i64> [#uses=1]
+	%shr85 = lshr i64 %conv31, 25		; <i64> [#uses=0]
+	%xor87 = xor i64 0, %or74		; <i64> [#uses=1]
+	%and902706 = and i32 %tmp30, %tmp3		; <i32> [#uses=1]
+	%and90 = zext i32 %and902706 to i64		; <i64> [#uses=1]
+	%xor94 = xor i64 0, %and90		; <i64> [#uses=1]
+	%add = add i64 %conv11, 1508970993		; <i64> [#uses=1]
+	%add95 = add i64 %add, %xor94		; <i64> [#uses=1]
+	%add98 = add i64 %add95, %xor87		; <i64> [#uses=1]
+	%add99 = add i64 %add98, 0		; <i64> [#uses=2]
+	%xor130 = zext i32 undef to i64		; <i64> [#uses=1]
+	%add134 = add i64 %add99, %conv27		; <i64> [#uses=2]
+	%add131 = add i64 %xor130, 0		; <i64> [#uses=1]
+	%add137 = add i64 %add131, %add99		; <i64> [#uses=5]
+	%conv1422700 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl143 = shl i64 %conv1422700, 24		; <i64> [#uses=1]
+	%ptrincdec1393051 = load i8* undef		; <i8> [#uses=1]
+	%conv1512699 = zext i8 %ptrincdec1393051 to i64		; <i64> [#uses=1]
+	%shl152 = shl i64 %conv1512699, 16		; <i64> [#uses=1]
+	%conv1712697 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or153 = or i64 %shl152, %shl143		; <i64> [#uses=1]
+	%or163 = or i64 %or153, %conv1712697		; <i64> [#uses=1]
+	%or172 = or i64 %or163, 0		; <i64> [#uses=1]
+	%and203 = and i64 %add134, %conv31		; <i64> [#uses=1]
+	%xor208 = xor i64 0, %and203		; <i64> [#uses=1]
+	%add200 = add i64 0, 2453635748		; <i64> [#uses=1]
+	%add209 = add i64 %add200, %or172		; <i64> [#uses=1]
+	%add212 = add i64 %add209, %xor208		; <i64> [#uses=1]
+	%add213 = add i64 %add212, 0		; <i64> [#uses=2]
+	%shl228 = shl i64 %add137, 10		; <i64> [#uses=1]
+	%and230 = lshr i64 %add137, 22		; <i64> [#uses=1]
+	%shr231 = and i64 %and230, 1023		; <i64> [#uses=1]
+	%or232 = or i64 %shr231, %shl228		; <i64> [#uses=1]
+	%xor226 = xor i64 0, %or232		; <i64> [#uses=1]
+	%xor233 = xor i64 %xor226, 0		; <i64> [#uses=1]
+	%and2362695 = zext i32 undef to i64		; <i64> [#uses=1]
+	%xor240 = and i64 %add137, %and2362695		; <i64> [#uses=1]
+	%and2432694 = and i32 %tmp18, %tmp14		; <i32> [#uses=1]
+	%and243 = zext i32 %and2432694 to i64		; <i64> [#uses=1]
+	%xor244 = xor i64 %xor240, %and243		; <i64> [#uses=1]
+	%add248 = add i64 %add213, %conv23		; <i64> [#uses=2]
+	%add245 = add i64 %xor233, %xor244		; <i64> [#uses=1]
+	%add251 = add i64 %add245, %add213		; <i64> [#uses=1]
+	%conv2752691 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl276 = shl i64 %conv2752691, 8		; <i64> [#uses=0]
+	%and317 = and i64 %add248, %add134		; <i64> [#uses=1]
+	%neg319 = xor i64 %add248, 4294967295		; <i64> [#uses=1]
+	%and321 = and i64 %neg319, %conv31		; <i64> [#uses=1]
+	%xor322 = xor i64 %and321, %and317		; <i64> [#uses=1]
+	%add314 = add i64 %conv, 2870763221		; <i64> [#uses=1]
+	%add323 = add i64 %add314, 0		; <i64> [#uses=1]
+	%add326 = add i64 %add323, %xor322		; <i64> [#uses=1]
+	%add327 = add i64 %add326, 0		; <i64> [#uses=2]
+	%and3502689 = xor i64 %add137, %conv15		; <i64> [#uses=1]
+	%xor354 = and i64 %add251, %and3502689		; <i64> [#uses=1]
+	%and357 = and i64 %add137, %conv15		; <i64> [#uses=1]
+	%xor358 = xor i64 %xor354, %and357		; <i64> [#uses=1]
+	%add362 = add i64 %add327, %conv19		; <i64> [#uses=1]
+	%add359 = add i64 0, %xor358		; <i64> [#uses=1]
+	%add365 = add i64 %add359, %add327		; <i64> [#uses=1]
+	%add770 = add i64 %add362, 1426881987		; <i64> [#uses=1]
+	%add779 = add i64 %add770, 0		; <i64> [#uses=1]
+	%add782 = add i64 %add779, 0		; <i64> [#uses=1]
+	%add783 = add i64 %add782, 0		; <i64> [#uses=2]
+	%add818 = add i64 %add783, %add365		; <i64> [#uses=1]
+	%add821 = add i64 0, %add783		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%add1046 = add i64 undef, undef		; <i64> [#uses=1]
+	%add1160 = add i64 undef, undef		; <i64> [#uses=1]
+	store i32 0, i32* undef
+	%add1235 = add i64 0, %add818		; <i64> [#uses=1]
+	%add1238 = add i64 %add1235, 0		; <i64> [#uses=1]
+	%add1239 = add i64 %add1238, 0		; <i64> [#uses=1]
+	br label %for.cond
+
+for.cond:		; preds = %for.body, %while.body
+	%h.0 = phi i64 [ undef, %while.body ], [ %add2035, %for.body ]		; <i64> [#uses=1]
+	%g.0 = phi i64 [ %add1046, %while.body ], [ undef, %for.body ]		; <i64> [#uses=1]
+	%f.0 = phi i64 [ %add1160, %while.body ], [ undef, %for.body ]		; <i64> [#uses=1]
+	%add821.pn = phi i64 [ %add821, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%add1239.pn2648 = phi i64 [ %add1239, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%d.0 = phi i64 [ undef, %while.body ], [ %add2038, %for.body ]		; <i64> [#uses=2]
+	br i1 undef, label %for.end, label %for.body
+
+for.body:		; preds = %for.cond
+	%conv1390 = zext i32 undef to i64		; <i64> [#uses=1]
+	%add1375 = add i64 0, %h.0		; <i64> [#uses=1]
+	%add1384 = add i64 %add1375, 0		; <i64> [#uses=1]
+	%add1391 = add i64 %add1384, %conv1390		; <i64> [#uses=1]
+	%add1392 = add i64 %add1391, 0		; <i64> [#uses=2]
+	%or1411 = or i64 0, undef		; <i64> [#uses=1]
+	%xor1405 = xor i64 0, %or1411		; <i64> [#uses=1]
+	%xor1412 = xor i64 %xor1405, 0		; <i64> [#uses=1]
+	%add1427 = add i64 %add1392, %d.0		; <i64> [#uses=1]
+	%add1424 = add i64 %xor1412, 0		; <i64> [#uses=1]
+	%add1430 = add i64 %add1424, %add1392		; <i64> [#uses=5]
+	%tmp1438 = load i32* undef		; <i32> [#uses=1]
+	%conv1439 = zext i32 %tmp1438 to i64		; <i64> [#uses=4]
+	%shl1441 = shl i64 %conv1439, 25		; <i64> [#uses=1]
+	%shr1444 = lshr i64 %conv1439, 7		; <i64> [#uses=1]
+	%or1445 = or i64 %shl1441, %shr1444		; <i64> [#uses=1]
+	%shr1450 = lshr i64 %conv1439, 18		; <i64> [#uses=1]
+	%or1451 = or i64 0, %shr1450		; <i64> [#uses=1]
+	%shr1454 = lshr i64 %conv1439, 3		; <i64> [#uses=1]
+	%xor1452 = xor i64 %or1451, %shr1454		; <i64> [#uses=1]
+	%xor1455 = xor i64 %xor1452, %or1445		; <i64> [#uses=1]
+	%conv1464 = zext i32 undef to i64		; <i64> [#uses=4]
+	%shl1466 = shl i64 %conv1464, 15		; <i64> [#uses=1]
+	%shr1469 = lshr i64 %conv1464, 17		; <i64> [#uses=1]
+	%or1470 = or i64 %shl1466, %shr1469		; <i64> [#uses=1]
+	%shr1475 = lshr i64 %conv1464, 19		; <i64> [#uses=1]
+	%or1476 = or i64 0, %shr1475		; <i64> [#uses=1]
+	%shr1479 = lshr i64 %conv1464, 10		; <i64> [#uses=1]
+	%xor1477 = xor i64 %or1476, %shr1479		; <i64> [#uses=1]
+	%xor1480 = xor i64 %xor1477, %or1470		; <i64> [#uses=1]
+	%tmp1499 = load i32* null		; <i32> [#uses=1]
+	%conv1500 = zext i32 %tmp1499 to i64		; <i64> [#uses=1]
+	%add1491 = add i64 %conv1500, 0		; <i64> [#uses=1]
+	%add1501 = add i64 %add1491, %xor1455		; <i64> [#uses=1]
+	%add1502 = add i64 %add1501, %xor1480		; <i64> [#uses=1]
+	%conv1504 = and i64 %add1502, 4294967295		; <i64> [#uses=1]
+	%tmp1541 = load i32* undef		; <i32> [#uses=1]
+	%conv1542 = zext i32 %tmp1541 to i64		; <i64> [#uses=1]
+	%add1527 = add i64 %conv1542, %g.0		; <i64> [#uses=1]
+	%add1536 = add i64 %add1527, 0		; <i64> [#uses=1]
+	%add1543 = add i64 %add1536, %conv1504		; <i64> [#uses=1]
+	%add1544 = add i64 %add1543, 0		; <i64> [#uses=1]
+	%shl1546 = shl i64 %add1430, 30		; <i64> [#uses=1]
+	%and1548 = lshr i64 %add1430, 2		; <i64> [#uses=1]
+	%shr1549 = and i64 %and1548, 1073741823		; <i64> [#uses=1]
+	%or1550 = or i64 %shr1549, %shl1546		; <i64> [#uses=1]
+	%shl1552 = shl i64 %add1430, 19		; <i64> [#uses=1]
+	%or1556 = or i64 0, %shl1552		; <i64> [#uses=1]
+	%shl1559 = shl i64 %add1430, 10		; <i64> [#uses=1]
+	%or1563 = or i64 0, %shl1559		; <i64> [#uses=1]
+	%xor1557 = xor i64 %or1556, %or1563		; <i64> [#uses=1]
+	%xor1564 = xor i64 %xor1557, %or1550		; <i64> [#uses=1]
+	%add1576 = add i64 %xor1564, 0		; <i64> [#uses=1]
+	%add1582 = add i64 %add1576, %add1544		; <i64> [#uses=3]
+	store i32 undef, i32* undef
+	%tmp1693 = load i32* undef		; <i32> [#uses=1]
+	%conv1694 = zext i32 %tmp1693 to i64		; <i64> [#uses=1]
+	%add1679 = add i64 %conv1694, %f.0		; <i64> [#uses=1]
+	%add1688 = add i64 %add1679, 0		; <i64> [#uses=1]
+	%add1695 = add i64 %add1688, 0		; <i64> [#uses=1]
+	%add1696 = add i64 %add1695, 0		; <i64> [#uses=1]
+	%shl1698 = shl i64 %add1582, 30		; <i64> [#uses=0]
+	%shl1704 = shl i64 %add1582, 19		; <i64> [#uses=0]
+	%add1734 = add i64 0, %add1696		; <i64> [#uses=1]
+	%add1983 = add i64 0, %add1427		; <i64> [#uses=1]
+	%add1992 = add i64 %add1983, 0		; <i64> [#uses=1]
+	%add1999 = add i64 %add1992, 0		; <i64> [#uses=1]
+	%add2000 = add i64 %add1999, 0		; <i64> [#uses=2]
+	%and2030 = and i64 %add1734, %add1582		; <i64> [#uses=1]
+	%xor2031 = xor i64 0, %and2030		; <i64> [#uses=1]
+	%add2035 = add i64 %add2000, %add1430		; <i64> [#uses=1]
+	%add2032 = add i64 0, %xor2031		; <i64> [#uses=1]
+	%add2038 = add i64 %add2032, %add2000		; <i64> [#uses=1]
+	store i32 0, i32* undef
+	br label %for.cond
+
+for.end:		; preds = %for.cond
+	store i32 undef, i32* %arrayidx5
+	store i32 undef, i32* %arrayidx9
+	%d.02641 = trunc i64 %d.0 to i32		; <i32> [#uses=1]
+	%conv2524 = add i32 %tmp14, %d.02641		; <i32> [#uses=1]
+	store i32 %conv2524, i32* %arrayidx13
+	%exitcond2789 = icmp eq i64 undef, %num		; <i1> [#uses=1]
+	br i1 %exitcond2789, label %while.end, label %while.body
+
+while.end:		; preds = %for.end, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
new file mode 100644
index 0000000..e6f3008
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s | grep "subq.*\\\$40, \\\%rsp"
+target triple = "x86_64-mingw64"
+
+define x86_fp80 @a(i64 %x) nounwind readnone {
+entry:
+	%conv = sitofp i64 %x to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %conv
+}
+

diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
new file mode 100644
index 0000000..cb64bf2
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -o %t1
+; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
+; RUN: grep "movaps	\\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
+; RUN: grep "movaps	\\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
+target triple = "x86_64-mingw64"
+
+define i32 @a() nounwind {
+entry:
+	tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret i32 undef
+}
+

diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
new file mode 100644
index 0000000..9415732
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+
+define fastcc void @MinSize(%struct.rec* %x) nounwind {
+entry:
+	%tmp13 = load i8* undef, align 4		; <i8> [#uses=3]
+	%tmp14 = zext i8 %tmp13 to i32		; <i32> [#uses=2]
+	switch i32 %tmp14, label %bb1109 [
+		i32 42, label %bb246
+	]
+
+bb246:		; preds = %entry, %entry
+	switch i8 %tmp13, label %bb249 [
+		i8 42, label %bb269
+		i8 44, label %bb269
+	]
+
+bb249:		; preds = %bb246
+	%tmp3240 = icmp eq i8 %tmp13, 0		; <i1> [#uses=1]
+	br i1 %tmp3240, label %bb974, label %bb269
+
+bb269:
+	%tmp3424 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 0, i32 0, i32 1		; <%struct.rec**> [#uses=0]
+	unreachable
+
+bb974:
+	unreachable
+
+bb1109:		; preds = %entry
+	call fastcc void @Image(i32 %tmp14) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare fastcc void @Image(i32) nounwind

diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
new file mode 100644
index 0000000..336f17e
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
+
+define <8 x i8> @a(i8 zeroext %x) nounwind {
+  %r = insertelement <8 x i8> undef, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+

diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
new file mode 100644
index 0000000..5c51480
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: grep movzwl %t1 | count 2
+; RUN: grep movzbl %t1 | count 2
+; RUN: grep movd %t1 | count 4
+
+define <4 x i16> @a(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
+  ret <4 x i16> %r
+}
+
+define <8 x i16> @b(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
+  ret <8 x i16> %r
+}
+
+define <8 x i8> @c(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
+define <16 x i8> @d(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
+  ret <16 x i8> %r
+}
+

diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
new file mode 100644
index 0000000..8bb3dc6
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
+entry:
+	%conv = bitcast <2 x i64> %a to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%conv2 = trunc i32 %b to i16		; <i16> [#uses=1]
+	%and = and i32 %imm, 7		; <i32> [#uses=1]
+	%vecins = insertelement <8 x i16> %conv, i16 %conv2, i32 %and		; <<8 x i16>> [#uses=1]
+	%conv6 = bitcast <8 x i16> %vecins to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %conv6
+}

diff --git a/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
new file mode 100644
index 0000000..e361804
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-05-sitofpCrash.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse
+; PR2598
+
+define <2 x float> @a(<2 x i32> %i) nounwind {
+  %r = sitofp <2 x i32> %i to <2 x float> 
+  ret <2 x float> %r
+}
+
+define <2 x i32> @b(<2 x float> %i) nounwind {
+  %r = fptosi <2 x float> %i to <2 x i32> 
+  ret <2 x i32> %r
+}
+

diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
new file mode 100644
index 0000000..92419fc
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %0
+}
+

diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
new file mode 100644
index 0000000..07ef53e
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
+
+define i64 @a(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%0 = insertelement <2 x i32> undef, i32 %a, i32 0		; <<2 x i32>> [#uses=1]
+	%1 = insertelement <2 x i32> %0, i32 %b, i32 1		; <<2 x i32>> [#uses=1]
+	%conv = bitcast <2 x i32> %1 to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}
+

diff --git a/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
new file mode 100644
index 0000000..673e936
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+
+; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
+; calling convention out of sync with standard c calling convention on x86_64)
+; Bug 4278.
+
+declare fastcc double @tailcallee(x86_fp80, <2 x float>) 
+	
+define fastcc double @tailcall() {
+entry:
+  %tmp = fpext float 1.000000e+00 to x86_fp80
+	%tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp,  <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+	ret double %tmp2
+}

diff --git a/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
new file mode 100644
index 0000000..feb5780
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -tailcallopt | not grep TAILCALL 
+
+; Bug 4396. This tail call can NOT be optimized.
+
+declare fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv() nounwind
+
+define fastcc i8* @_D3gcx2GC12callocNoSyncMFmkZPv() nounwind {
+entry:
+	%tmp6 = tail call fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv()		; <i8*> [#uses=2]
+	%tmp9 = tail call i8* @memset(i8* %tmp6, i32 0, i64 2)		; <i8*> [#uses=0]
+	ret i8* %tmp6
+}
+
+declare i8* @memset(i8*, i32, i64)

diff --git a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
new file mode 100644
index 0000000..228cd48
--- /dev/null
+++ b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
+; PR2484
+
+define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
+entry:
+%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4,i32
+5,i32 2,i32 3>
+ret <4 x float> %shuffle
+}

diff --git a/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
new file mode 100644
index 0000000..fcc71ae
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll

@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=x86 -mtriple=x86_64-unknown-freebsd7.2
+; PR4478
+
+	%struct.sockaddr = type <{ i8, i8, [14 x i8] }>
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %sw.bb6, %entry
+	switch i32 undef, label %sw.default [
+		i32 -1, label %while.end
+		i32 119, label %sw.bb6
+	]
+
+sw.bb6:		; preds = %while.cond
+	br i1 undef, label %if.then, label %while.cond
+
+if.then:		; preds = %sw.bb6
+	ret i32 1
+
+sw.default:		; preds = %while.cond
+	ret i32 1
+
+while.end:		; preds = %while.cond
+	br i1 undef, label %if.then15, label %if.end16
+
+if.then15:		; preds = %while.end
+	ret i32 1
+
+if.end16:		; preds = %while.end
+	br i1 undef, label %lor.lhs.false, label %if.then21
+
+lor.lhs.false:		; preds = %if.end16
+	br i1 undef, label %if.end22, label %if.then21
+
+if.then21:		; preds = %lor.lhs.false, %if.end16
+	ret i32 1
+
+if.end22:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false27, label %if.then51
+
+lor.lhs.false27:		; preds = %if.end22
+	br i1 undef, label %lor.lhs.false39, label %if.then51
+
+lor.lhs.false39:		; preds = %lor.lhs.false27
+	br i1 undef, label %if.end52, label %if.then51
+
+if.then51:		; preds = %lor.lhs.false39, %lor.lhs.false27, %if.end22
+	ret i32 1
+
+if.end52:		; preds = %lor.lhs.false39
+	br i1 undef, label %if.then57, label %if.end58
+
+if.then57:		; preds = %if.end52
+	ret i32 1
+
+if.end58:		; preds = %if.end52
+	br i1 undef, label %if.then64, label %if.end65
+
+if.then64:		; preds = %if.end58
+	ret i32 1
+
+if.end65:		; preds = %if.end58
+	br i1 undef, label %if.then71, label %if.end72
+
+if.then71:		; preds = %if.end65
+	ret i32 1
+
+if.end72:		; preds = %if.end65
+	br i1 undef, label %if.then83, label %if.end84
+
+if.then83:		; preds = %if.end72
+	ret i32 1
+
+if.end84:		; preds = %if.end72
+	br i1 undef, label %if.then101, label %if.end102
+
+if.then101:		; preds = %if.end84
+	ret i32 1
+
+if.end102:		; preds = %if.end84
+	br i1 undef, label %if.then113, label %if.end114
+
+if.then113:		; preds = %if.end102
+	ret i32 1
+
+if.end114:		; preds = %if.end102
+	br i1 undef, label %if.then209, label %if.end210
+
+if.then209:		; preds = %if.end114
+	ret i32 1
+
+if.end210:		; preds = %if.end114
+	br i1 undef, label %if.then219, label %if.end220
+
+if.then219:		; preds = %if.end210
+	ret i32 1
+
+if.end220:		; preds = %if.end210
+	br i1 undef, label %if.end243, label %lor.lhs.false230
+
+lor.lhs.false230:		; preds = %if.end220
+	unreachable
+
+if.end243:		; preds = %if.end220
+	br i1 undef, label %if.then249, label %if.end250
+
+if.then249:		; preds = %if.end243
+	ret i32 1
+
+if.end250:		; preds = %if.end243
+	br i1 undef, label %if.end261, label %if.then260
+
+if.then260:		; preds = %if.end250
+	ret i32 1
+
+if.end261:		; preds = %if.end250
+	br i1 undef, label %if.then270, label %if.end271
+
+if.then270:		; preds = %if.end261
+	ret i32 1
+
+if.end271:		; preds = %if.end261
+	%call.i = call i32 @arc4random() nounwind		; <i32> [#uses=1]
+	%rem.i = urem i32 %call.i, 16383		; <i32> [#uses=1]
+	%rem1.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	%conv2.i = or i16 %rem1.i, -16384		; <i16> [#uses=1]
+	%0 = call i16 asm "xchgb ${0:h}, ${0:b}", "=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv2.i) nounwind		; <i16> [#uses=1]
+	store i16 %0, i16* undef
+	%call281 = call i32 @bind(i32 undef, %struct.sockaddr* undef, i32 16) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @bind(i32, %struct.sockaddr*, i32)
+
+declare i32 @arc4random()

diff --git a/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/test/CodeGen/X86/2009-07-07-SplitICmp.ll
new file mode 100644
index 0000000..eb9378b
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-07-SplitICmp.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -disable-mmx
+
+define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
+       %D = icmp sgt <2 x i32> %A, %B
+       %E = zext <2 x i1> %D to <2 x i32>
+       store <2 x i32> %E, <2 x i32>* %C
+       ret void
+}

diff --git a/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
new file mode 100644
index 0000000..0fdfdcb
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; PR3037
+
+define void @entry(<4 x i8>* %dest) {
+	%1 = xor <4 x i1> zeroinitializer, < i1 true, i1 true, i1 true, i1 true >
+	%2 = extractelement <4 x i1> %1, i32 3
+	%3 = zext i1 %2 to i8
+	%4 = insertelement <4 x i8> zeroinitializer, i8 %3, i32 3
+	store <4 x i8> %4, <4 x i8>* %dest, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
new file mode 100644
index 0000000..eabaf77
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll

@@ -0,0 +1,958 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+	%struct.ANY = type { i8* }
+	%struct.AV = type { %struct.XPVAV*, i32, i32 }
+	%struct.CLONE_PARAMS = type { %struct.AV*, i64, %struct.PerlInterpreter* }
+	%struct.CV = type { %struct.XPVCV*, i32, i32 }
+	%struct.DIR = type { i32, i64, i64, i8*, i32, i64, i64, i32, %struct.__darwin_pthread_mutex_t, %struct._telldir* }
+	%struct.GP = type { %struct.SV*, i32, %struct.io*, %struct.CV*, %struct.AV*, %struct.HV*, %struct.GV*, %struct.CV*, i32, i32, i32, i8* }
+	%struct.GV = type { %struct.XPVGV*, i32, i32 }
+	%struct.HE = type { %struct.HE*, %struct.HEK*, %struct.SV* }
+	%struct.HEK = type { i32, i32, [1 x i8] }
+	%struct.HV = type { %struct.XPVHV*, i32, i32 }
+	%struct.MAGIC = type { %struct.MAGIC*, %struct.MGVTBL*, i16, i8, i8, %struct.SV*, i8*, i32 }
+	%struct.MGVTBL = type { i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*, %struct.SV*, i8*, i32)*, i32 (%struct.MAGIC*, %struct.CLONE_PARAMS*)* }
+	%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8 }
+	%struct.PMOP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8, %struct.OP*, %struct.OP*, %struct.OP*, %struct.OP*, %struct.PMOP*, %struct.REGEXP*, i32, i32, i8, %struct.HV* }
+	%struct.PerlIO_funcs = type { i64, i8*, i64, i32, i64 (%struct.PerlIOl**, i8*, %struct.SV*, %struct.PerlIO_funcs*)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIO_funcs*, %struct.PerlIO_list_t*, i64, i8*, i32, i32, i32, %struct.PerlIOl**, i32, %struct.SV**)*, i64 (%struct.PerlIOl**)*, %struct.SV* (%struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIOl**, %struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i64, i32)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**, i8*, i64)* }
+	%struct.PerlIO_list_t = type { i64, i64, i64, %struct.PerlIO_pair_t* }
+	%struct.PerlIO_pair_t = type { %struct.PerlIO_funcs*, %struct.SV* }
+	%struct.PerlIOl = type { %struct.PerlIOl*, %struct.PerlIO_funcs*, i32 }
+	%struct.PerlInterpreter = type { i8 }
+	%struct.REGEXP = type { i32*, i32*, %struct.regnode*, %struct.reg_substr_data*, i8*, %struct.reg_data*, i8*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, [1 x %struct.regnode] }
+	%struct.SV = type { i8*, i32, i32 }
+	%struct.XPVAV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.SV**, %struct.SV*, i8 }
+	%struct.XPVCV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.HV*, %struct.OP*, %struct.OP*, void (%struct.CV*)*, %struct.ANY, %struct.GV*, i8*, i64, %struct.AV*, %struct.CV*, i16, i32 }
+	%struct.XPVGV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.GP*, i8*, i64, %struct.HV*, i8 }
+	%struct.XPVHV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, i32, %struct.HE*, %struct.PMOP*, i8* }
+	%struct.XPVIO = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.PerlIOl**, %struct.PerlIOl**, %struct.anon, i64, i64, i64, i64, i8*, %struct.GV*, i8*, %struct.GV*, i8*, %struct.GV*, i16, i8, i8 }
+	%struct.__darwin_pthread_mutex_t = type { i64, [56 x i8] }
+	%struct._telldir = type opaque
+	%struct.anon = type { %struct.DIR* }
+	%struct.io = type { %struct.XPVIO*, i32, i32 }
+	%struct.reg_data = type { i32, i8*, [1 x i8*] }
+	%struct.reg_substr_data = type { [3 x %struct.reg_substr_datum] }
+	%struct.reg_substr_datum = type { i32, i32, %struct.SV*, %struct.SV* }
+	%struct.regnode = type { i8, i8, i16 }
+
+define i32 @Perl_yylex() nounwind ssp {
+entry:
+	br i1 undef, label %bb21, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb21:		; preds = %entry
+	switch i32 undef, label %bb103 [
+		i32 1, label %bb101
+		i32 4, label %bb75
+		i32 6, label %bb68
+		i32 7, label %bb67
+		i32 8, label %bb25
+	]
+
+bb25:		; preds = %bb21
+	ret i32 41
+
+bb67:		; preds = %bb21
+	ret i32 40
+
+bb68:		; preds = %bb21
+	br i1 undef, label %bb69, label %bb70
+
+bb69:		; preds = %bb68
+	ret i32 undef
+
+bb70:		; preds = %bb68
+	unreachable
+
+bb75:		; preds = %bb21
+	unreachable
+
+bb101:		; preds = %bb21
+	unreachable
+
+bb103:		; preds = %bb21
+	switch i32 undef, label %bb104 [
+		i32 0, label %bb126
+		i32 4, label %fake_eof
+		i32 26, label %fake_eof
+		i32 34, label %bb1423
+		i32 36, label %bb1050
+		i32 37, label %bb534
+		i32 39, label %bb1412
+		i32 41, label %bb643
+		i32 44, label %bb544
+		i32 48, label %bb1406
+		i32 49, label %bb1406
+		i32 50, label %bb1406
+		i32 51, label %bb1406
+		i32 52, label %bb1406
+		i32 53, label %bb1406
+		i32 54, label %bb1406
+		i32 55, label %bb1406
+		i32 56, label %bb1406
+		i32 57, label %bb1406
+		i32 59, label %bb639
+		i32 65, label %keylookup
+		i32 66, label %keylookup
+		i32 67, label %keylookup
+		i32 68, label %keylookup
+		i32 69, label %keylookup
+		i32 70, label %keylookup
+		i32 71, label %keylookup
+		i32 72, label %keylookup
+		i32 73, label %keylookup
+		i32 74, label %keylookup
+		i32 75, label %keylookup
+		i32 76, label %keylookup
+		i32 77, label %keylookup
+		i32 78, label %keylookup
+		i32 79, label %keylookup
+		i32 80, label %keylookup
+		i32 81, label %keylookup
+		i32 82, label %keylookup
+		i32 83, label %keylookup
+		i32 84, label %keylookup
+		i32 85, label %keylookup
+		i32 86, label %keylookup
+		i32 87, label %keylookup
+		i32 88, label %keylookup
+		i32 89, label %keylookup
+		i32 90, label %keylookup
+		i32 92, label %bb1455
+		i32 95, label %keylookup
+		i32 96, label %bb1447
+		i32 97, label %keylookup
+		i32 98, label %keylookup
+		i32 99, label %keylookup
+		i32 100, label %keylookup
+		i32 101, label %keylookup
+		i32 102, label %keylookup
+		i32 103, label %keylookup
+		i32 104, label %keylookup
+		i32 105, label %keylookup
+		i32 106, label %keylookup
+		i32 107, label %keylookup
+		i32 108, label %keylookup
+		i32 109, label %keylookup
+		i32 110, label %keylookup
+		i32 111, label %keylookup
+		i32 112, label %keylookup
+		i32 113, label %keylookup
+		i32 114, label %keylookup
+		i32 115, label %keylookup
+		i32 116, label %keylookup
+		i32 117, label %keylookup
+		i32 118, label %keylookup
+		i32 119, label %keylookup
+		i32 120, label %keylookup
+		i32 121, label %keylookup
+		i32 122, label %keylookup
+		i32 126, label %bb544
+	]
+
+bb104:		; preds = %bb103
+	unreachable
+
+bb126:		; preds = %bb103
+	ret i32 0
+
+fake_eof:		; preds = %bb1841, %bb103, %bb103
+	unreachable
+
+bb534:		; preds = %bb103
+	unreachable
+
+bb544:		; preds = %bb103, %bb103
+	ret i32 undef
+
+bb639:		; preds = %bb103
+	unreachable
+
+bb643:		; preds = %bb103
+	unreachable
+
+bb1050:		; preds = %bb103
+	unreachable
+
+bb1406:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	unreachable
+
+bb1412:		; preds = %bb103
+	unreachable
+
+bb1423:		; preds = %bb103
+	unreachable
+
+bb1447:		; preds = %bb103
+	unreachable
+
+bb1455:		; preds = %bb103
+	unreachable
+
+keylookup:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	br i1 undef, label %bb1498, label %bb1496
+
+bb1496:		; preds = %keylookup
+	br i1 undef, label %bb1498, label %bb1510.preheader
+
+bb1498:		; preds = %bb1496, %keylookup
+	unreachable
+
+bb1510.preheader:		; preds = %bb1496
+	br i1 undef, label %bb1511, label %bb1518
+
+bb1511:		; preds = %bb1510.preheader
+	br label %bb1518
+
+bb1518:		; preds = %bb1511, %bb1510.preheader
+	switch i32 undef, label %bb741.i4285 [
+		i32 95, label %bb744.i4287
+		i32 115, label %bb852.i4394
+	]
+
+bb741.i4285:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb744.i4287:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb852.i4394:		; preds = %bb1518
+	br i1 undef, label %bb861.i4404, label %bb856.i4399
+
+bb856.i4399:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+bb861.i4404:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+Perl_keyword.exit4735:		; preds = %bb861.i4404, %bb856.i4399, %bb744.i4287, %bb741.i4285
+	br i1 undef, label %bb1544, label %reserved_word
+
+bb1544:		; preds = %Perl_keyword.exit4735
+	br i1 undef, label %bb1565, label %bb1545
+
+bb1545:		; preds = %bb1544
+	br i1 undef, label %bb1563, label %bb1558
+
+bb1558:		; preds = %bb1545
+	%0 = load %struct.SV** undef		; <%struct.SV*> [#uses=1]
+	%1 = bitcast %struct.SV* %0 to %struct.GV*		; <%struct.GV*> [#uses=5]
+	br i1 undef, label %bb1563, label %bb1559
+
+bb1559:		; preds = %bb1558
+	br i1 undef, label %bb1560, label %bb1563
+
+bb1560:		; preds = %bb1559
+	br i1 undef, label %bb1563, label %bb1561
+
+bb1561:		; preds = %bb1560
+	br i1 undef, label %bb1562, label %bb1563
+
+bb1562:		; preds = %bb1561
+	br label %bb1563
+
+bb1563:		; preds = %bb1562, %bb1561, %bb1560, %bb1559, %bb1558, %bb1545
+	%gv19.3 = phi %struct.GV* [ %1, %bb1562 ], [ undef, %bb1545 ], [ %1, %bb1558 ], [ %1, %bb1559 ], [ %1, %bb1560 ], [ %1, %bb1561 ]		; <%struct.GV*> [#uses=0]
+	br i1 undef, label %bb1565, label %reserved_word
+
+bb1565:		; preds = %bb1563, %bb1544
+	br i1 undef, label %bb1573, label %bb1580
+
+bb1573:		; preds = %bb1565
+	br label %bb1580
+
+bb1580:		; preds = %bb1573, %bb1565
+	br i1 undef, label %bb1595, label %reserved_word
+
+bb1595:		; preds = %bb1580
+	br i1 undef, label %reserved_word, label %bb1597
+
+bb1597:		; preds = %bb1595
+	br i1 undef, label %reserved_word, label %bb1602
+
+bb1602:		; preds = %bb1597
+	br label %reserved_word
+
+reserved_word:		; preds = %bb1602, %bb1597, %bb1595, %bb1580, %bb1563, %Perl_keyword.exit4735
+	switch i32 undef, label %bb2012 [
+		i32 1, label %bb1819
+		i32 2, label %bb1830
+		i32 4, label %bb1841
+		i32 5, label %bb1841
+		i32 8, label %bb1880
+		i32 14, label %bb1894
+		i32 16, label %bb1895
+		i32 17, label %bb1896
+		i32 18, label %bb1897
+		i32 19, label %bb1898
+		i32 20, label %bb1899
+		i32 22, label %bb1906
+		i32 23, label %bb1928
+		i32 24, label %bb2555
+		i32 26, label %bb1929
+		i32 31, label %bb1921
+		i32 32, label %bb1930
+		i32 33, label %bb1905
+		i32 34, label %bb1936
+		i32 35, label %bb1927
+		i32 37, label %bb1962
+		i32 40, label %bb1951
+		i32 41, label %bb1946
+		i32 42, label %bb1968
+		i32 44, label %bb1969
+		i32 45, label %bb1970
+		i32 46, label %bb2011
+		i32 47, label %bb2006
+		i32 48, label %bb2007
+		i32 49, label %bb2009
+		i32 50, label %bb2010
+		i32 51, label %bb2008
+		i32 53, label %bb1971
+		i32 54, label %bb1982
+		i32 55, label %bb2005
+		i32 59, label %bb2081
+		i32 61, label %bb2087
+		i32 64, label %bb2080
+		i32 65, label %really_sub
+		i32 66, label %bb2079
+		i32 67, label %bb2089
+		i32 69, label %bb2155
+		i32 72, label %bb2137
+		i32 74, label %bb2138
+		i32 75, label %bb2166
+		i32 76, label %bb2144
+		i32 78, label %bb2145
+		i32 81, label %bb2102
+		i32 82, label %bb2108
+		i32 84, label %bb2114
+		i32 85, label %bb2115
+		i32 86, label %bb2116
+		i32 89, label %bb2146
+		i32 90, label %bb2147
+		i32 91, label %bb2148
+		i32 93, label %bb2154
+		i32 94, label %bb2167
+		i32 96, label %bb2091
+		i32 97, label %bb2090
+		i32 98, label %bb2088
+		i32 100, label %bb2173
+		i32 101, label %bb2174
+		i32 102, label %bb2175
+		i32 103, label %bb2180
+		i32 104, label %bb2181
+		i32 106, label %bb2187
+		i32 107, label %bb2188
+		i32 110, label %bb2206
+		i32 112, label %bb2217
+		i32 113, label %bb2218
+		i32 114, label %bb2199
+		i32 119, label %bb2205
+		i32 120, label %bb2229
+		i32 121, label %bb2233
+		i32 122, label %bb2234
+		i32 123, label %bb2235
+		i32 124, label %bb2236
+		i32 125, label %bb2237
+		i32 126, label %bb2238
+		i32 127, label %bb2239
+		i32 128, label %bb2268
+		i32 129, label %bb2267
+		i32 133, label %bb2276
+		i32 134, label %bb2348
+		i32 135, label %bb2337
+		i32 137, label %bb2239
+		i32 138, label %bb2367
+		i32 139, label %bb2368
+		i32 140, label %bb2369
+		i32 141, label %bb2357
+		i32 143, label %bb2349
+		i32 144, label %bb2350
+		i32 146, label %bb2356
+		i32 147, label %bb2370
+		i32 148, label %bb2445
+		i32 149, label %bb2453
+		i32 151, label %bb2381
+		i32 152, label %bb2457
+		i32 154, label %bb2516
+		i32 156, label %bb2522
+		i32 158, label %bb2527
+		i32 159, label %bb2537
+		i32 160, label %bb2503
+		i32 162, label %bb2504
+		i32 163, label %bb2464
+		i32 165, label %bb2463
+		i32 166, label %bb2538
+		i32 168, label %bb2515
+		i32 170, label %bb2549
+		i32 172, label %bb2566
+		i32 173, label %bb2595
+		i32 174, label %bb2565
+		i32 175, label %bb2567
+		i32 176, label %bb2568
+		i32 177, label %bb2569
+		i32 178, label %bb2570
+		i32 179, label %bb2594
+		i32 182, label %bb2571
+		i32 183, label %bb2572
+		i32 185, label %bb2593
+		i32 186, label %bb2583
+		i32 187, label %bb2596
+		i32 189, label %bb2602
+		i32 190, label %bb2603
+		i32 191, label %bb2604
+		i32 192, label %bb2605
+		i32 193, label %bb2606
+		i32 196, label %bb2617
+		i32 197, label %bb2618
+		i32 198, label %bb2619
+		i32 199, label %bb2627
+		i32 200, label %bb2625
+		i32 201, label %bb2626
+		i32 206, label %really_sub
+		i32 207, label %bb2648
+		i32 208, label %bb2738
+		i32 209, label %bb2739
+		i32 210, label %bb2740
+		i32 211, label %bb2742
+		i32 212, label %bb2741
+		i32 213, label %bb2737
+		i32 214, label %bb2743
+		i32 217, label %bb2758
+		i32 219, label %bb2764
+		i32 220, label %bb2765
+		i32 221, label %bb2744
+		i32 222, label %bb2766
+		i32 226, label %bb2785
+		i32 227, label %bb2783
+		i32 228, label %bb2784
+		i32 229, label %bb2790
+		i32 230, label %bb2797
+		i32 232, label %bb2782
+		i32 234, label %bb2791
+		i32 236, label %bb2815
+		i32 237, label %bb2818
+		i32 238, label %bb2819
+		i32 239, label %bb2820
+		i32 240, label %bb2817
+		i32 241, label %bb2816
+		i32 242, label %bb2821
+		i32 243, label %bb2826
+		i32 244, label %bb2829
+		i32 245, label %bb2830
+	]
+
+bb1819:		; preds = %reserved_word
+	unreachable
+
+bb1830:		; preds = %reserved_word
+	unreachable
+
+bb1841:		; preds = %reserved_word, %reserved_word
+	br i1 undef, label %fake_eof, label %bb1842
+
+bb1842:		; preds = %bb1841
+	unreachable
+
+bb1880:		; preds = %reserved_word
+	unreachable
+
+bb1894:		; preds = %reserved_word
+	ret i32 undef
+
+bb1895:		; preds = %reserved_word
+	ret i32 301
+
+bb1896:		; preds = %reserved_word
+	ret i32 undef
+
+bb1897:		; preds = %reserved_word
+	ret i32 undef
+
+bb1898:		; preds = %reserved_word
+	ret i32 undef
+
+bb1899:		; preds = %reserved_word
+	ret i32 undef
+
+bb1905:		; preds = %reserved_word
+	ret i32 278
+
+bb1906:		; preds = %reserved_word
+	unreachable
+
+bb1921:		; preds = %reserved_word
+	ret i32 288
+
+bb1927:		; preds = %reserved_word
+	ret i32 undef
+
+bb1928:		; preds = %reserved_word
+	ret i32 undef
+
+bb1929:		; preds = %reserved_word
+	ret i32 undef
+
+bb1930:		; preds = %reserved_word
+	ret i32 undef
+
+bb1936:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1937
+
+bb1937:		; preds = %bb1936
+	ret i32 undef
+
+bb1946:		; preds = %reserved_word
+	unreachable
+
+bb1951:		; preds = %reserved_word
+	ret i32 undef
+
+bb1962:		; preds = %reserved_word
+	ret i32 undef
+
+bb1968:		; preds = %reserved_word
+	ret i32 280
+
+bb1969:		; preds = %reserved_word
+	ret i32 276
+
+bb1970:		; preds = %reserved_word
+	ret i32 277
+
+bb1971:		; preds = %reserved_word
+	ret i32 288
+
+bb1982:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1986
+
+bb1986:		; preds = %bb1982
+	ret i32 undef
+
+bb2005:		; preds = %reserved_word
+	ret i32 undef
+
+bb2006:		; preds = %reserved_word
+	ret i32 282
+
+bb2007:		; preds = %reserved_word
+	ret i32 282
+
+bb2008:		; preds = %reserved_word
+	ret i32 282
+
+bb2009:		; preds = %reserved_word
+	ret i32 282
+
+bb2010:		; preds = %reserved_word
+	ret i32 282
+
+bb2011:		; preds = %reserved_word
+	ret i32 282
+
+bb2012:		; preds = %reserved_word
+	unreachable
+
+bb2079:		; preds = %reserved_word
+	ret i32 undef
+
+bb2080:		; preds = %reserved_word
+	ret i32 282
+
+bb2081:		; preds = %reserved_word
+	ret i32 undef
+
+bb2087:		; preds = %reserved_word
+	ret i32 undef
+
+bb2088:		; preds = %reserved_word
+	ret i32 287
+
+bb2089:		; preds = %reserved_word
+	ret i32 287
+
+bb2090:		; preds = %reserved_word
+	ret i32 undef
+
+bb2091:		; preds = %reserved_word
+	ret i32 280
+
+bb2102:		; preds = %reserved_word
+	ret i32 282
+
+bb2108:		; preds = %reserved_word
+	ret i32 undef
+
+bb2114:		; preds = %reserved_word
+	ret i32 undef
+
+bb2115:		; preds = %reserved_word
+	ret i32 282
+
+bb2116:		; preds = %reserved_word
+	ret i32 282
+
+bb2137:		; preds = %reserved_word
+	ret i32 undef
+
+bb2138:		; preds = %reserved_word
+	ret i32 282
+
+bb2144:		; preds = %reserved_word
+	ret i32 undef
+
+bb2145:		; preds = %reserved_word
+	ret i32 282
+
+bb2146:		; preds = %reserved_word
+	ret i32 undef
+
+bb2147:		; preds = %reserved_word
+	ret i32 undef
+
+bb2148:		; preds = %reserved_word
+	ret i32 282
+
+bb2154:		; preds = %reserved_word
+	ret i32 undef
+
+bb2155:		; preds = %reserved_word
+	ret i32 282
+
+bb2166:		; preds = %reserved_word
+	ret i32 282
+
+bb2167:		; preds = %reserved_word
+	ret i32 undef
+
+bb2173:		; preds = %reserved_word
+	ret i32 274
+
+bb2174:		; preds = %reserved_word
+	ret i32 undef
+
+bb2175:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2176
+
+bb2176:		; preds = %bb2175
+	ret i32 undef
+
+bb2180:		; preds = %reserved_word
+	ret i32 undef
+
+bb2181:		; preds = %reserved_word
+	ret i32 undef
+
+bb2187:		; preds = %reserved_word
+	ret i32 undef
+
+bb2188:		; preds = %reserved_word
+	ret i32 280
+
+bb2199:		; preds = %reserved_word
+	ret i32 295
+
+bb2205:		; preds = %reserved_word
+	ret i32 287
+
+bb2206:		; preds = %reserved_word
+	ret i32 287
+
+bb2217:		; preds = %reserved_word
+	ret i32 undef
+
+bb2218:		; preds = %reserved_word
+	ret i32 undef
+
+bb2229:		; preds = %reserved_word
+	unreachable
+
+bb2233:		; preds = %reserved_word
+	ret i32 undef
+
+bb2234:		; preds = %reserved_word
+	ret i32 undef
+
+bb2235:		; preds = %reserved_word
+	ret i32 undef
+
+bb2236:		; preds = %reserved_word
+	ret i32 undef
+
+bb2237:		; preds = %reserved_word
+	ret i32 undef
+
+bb2238:		; preds = %reserved_word
+	ret i32 undef
+
+bb2239:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2267:		; preds = %reserved_word
+	ret i32 280
+
+bb2268:		; preds = %reserved_word
+	ret i32 288
+
+bb2276:		; preds = %reserved_word
+	unreachable
+
+bb2337:		; preds = %reserved_word
+	ret i32 300
+
+bb2348:		; preds = %reserved_word
+	ret i32 undef
+
+bb2349:		; preds = %reserved_word
+	ret i32 undef
+
+bb2350:		; preds = %reserved_word
+	ret i32 undef
+
+bb2356:		; preds = %reserved_word
+	ret i32 undef
+
+bb2357:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2358
+
+bb2358:		; preds = %bb2357
+	ret i32 undef
+
+bb2367:		; preds = %reserved_word
+	ret i32 undef
+
+bb2368:		; preds = %reserved_word
+	ret i32 270
+
+bb2369:		; preds = %reserved_word
+	ret i32 undef
+
+bb2370:		; preds = %reserved_word
+	unreachable
+
+bb2381:		; preds = %reserved_word
+	unreachable
+
+bb2445:		; preds = %reserved_word
+	unreachable
+
+bb2453:		; preds = %reserved_word
+	unreachable
+
+bb2457:		; preds = %reserved_word
+	unreachable
+
+bb2463:		; preds = %reserved_word
+	ret i32 286
+
+bb2464:		; preds = %reserved_word
+	unreachable
+
+bb2503:		; preds = %reserved_word
+	ret i32 280
+
+bb2504:		; preds = %reserved_word
+	ret i32 undef
+
+bb2515:		; preds = %reserved_word
+	ret i32 undef
+
+bb2516:		; preds = %reserved_word
+	ret i32 undef
+
+bb2522:		; preds = %reserved_word
+	unreachable
+
+bb2527:		; preds = %reserved_word
+	unreachable
+
+bb2537:		; preds = %reserved_word
+	ret i32 undef
+
+bb2538:		; preds = %reserved_word
+	ret i32 undef
+
+bb2549:		; preds = %reserved_word
+	unreachable
+
+bb2555:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2556
+
+bb2556:		; preds = %bb2555
+	ret i32 undef
+
+bb2565:		; preds = %reserved_word
+	ret i32 undef
+
+bb2566:		; preds = %reserved_word
+	ret i32 undef
+
+bb2567:		; preds = %reserved_word
+	ret i32 undef
+
+bb2568:		; preds = %reserved_word
+	ret i32 undef
+
+bb2569:		; preds = %reserved_word
+	ret i32 undef
+
+bb2570:		; preds = %reserved_word
+	ret i32 undef
+
+bb2571:		; preds = %reserved_word
+	ret i32 undef
+
+bb2572:		; preds = %reserved_word
+	ret i32 undef
+
+bb2583:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2584
+
+bb2584:		; preds = %bb2583
+	ret i32 undef
+
+bb2593:		; preds = %reserved_word
+	ret i32 282
+
+bb2594:		; preds = %reserved_word
+	ret i32 282
+
+bb2595:		; preds = %reserved_word
+	ret i32 undef
+
+bb2596:		; preds = %reserved_word
+	ret i32 undef
+
+bb2602:		; preds = %reserved_word
+	ret i32 undef
+
+bb2603:		; preds = %reserved_word
+	ret i32 undef
+
+bb2604:		; preds = %reserved_word
+	ret i32 undef
+
+bb2605:		; preds = %reserved_word
+	ret i32 undef
+
+bb2606:		; preds = %reserved_word
+	ret i32 undef
+
+bb2617:		; preds = %reserved_word
+	ret i32 undef
+
+bb2618:		; preds = %reserved_word
+	ret i32 undef
+
+bb2619:		; preds = %reserved_word
+	unreachable
+
+bb2625:		; preds = %reserved_word
+	ret i32 undef
+
+bb2626:		; preds = %reserved_word
+	ret i32 undef
+
+bb2627:		; preds = %reserved_word
+	ret i32 undef
+
+bb2648:		; preds = %reserved_word
+	ret i32 undef
+
+really_sub:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2737:		; preds = %reserved_word
+	ret i32 undef
+
+bb2738:		; preds = %reserved_word
+	ret i32 undef
+
+bb2739:		; preds = %reserved_word
+	ret i32 undef
+
+bb2740:		; preds = %reserved_word
+	ret i32 undef
+
+bb2741:		; preds = %reserved_word
+	ret i32 undef
+
+bb2742:		; preds = %reserved_word
+	ret i32 undef
+
+bb2743:		; preds = %reserved_word
+	ret i32 undef
+
+bb2744:		; preds = %reserved_word
+	unreachable
+
+bb2758:		; preds = %reserved_word
+	ret i32 undef
+
+bb2764:		; preds = %reserved_word
+	ret i32 282
+
+bb2765:		; preds = %reserved_word
+	ret i32 282
+
+bb2766:		; preds = %reserved_word
+	ret i32 undef
+
+bb2782:		; preds = %reserved_word
+	ret i32 273
+
+bb2783:		; preds = %reserved_word
+	ret i32 275
+
+bb2784:		; preds = %reserved_word
+	ret i32 undef
+
+bb2785:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2786
+
+bb2786:		; preds = %bb2785
+	ret i32 undef
+
+bb2790:		; preds = %reserved_word
+	ret i32 undef
+
+bb2791:		; preds = %reserved_word
+	ret i32 undef
+
+bb2797:		; preds = %reserved_word
+	ret i32 undef
+
+bb2815:		; preds = %reserved_word
+	ret i32 undef
+
+bb2816:		; preds = %reserved_word
+	ret i32 272
+
+bb2817:		; preds = %reserved_word
+	ret i32 undef
+
+bb2818:		; preds = %reserved_word
+	ret i32 282
+
+bb2819:		; preds = %reserved_word
+	ret i32 undef
+
+bb2820:		; preds = %reserved_word
+	ret i32 282
+
+bb2821:		; preds = %reserved_word
+	unreachable
+
+bb2826:		; preds = %reserved_word
+	unreachable
+
+bb2829:		; preds = %reserved_word
+	ret i32 300
+
+bb2830:		; preds = %reserved_word
+	unreachable
+
+bb2834:		; preds = %bb2785, %bb2583, %bb2555, %bb2357, %bb2175, %bb1982, %bb1936
+	ret i32 283
+}

diff --git a/test/CodeGen/X86/2009-07-16-CoalescerBug.ll b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
new file mode 100644
index 0000000..48af440
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-CoalescerBug.ll

@@ -0,0 +1,210 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://7059496
+
+	%struct.brinfo = type <{ %struct.brinfo*, %struct.brinfo*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.cadata = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, %struct.cmatcher*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.cline = type <{ %struct.cline*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i32, %struct.cline*, %struct.cline*, i32, i32 }>
+	%struct.cmatch = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8, i32*, i32*, i8*, i8*, i32, i32, i32, i32, i16, i8, i8, i16, i8, i8 }>
+	%struct.cmatcher = type <{ i32, i8, i8, i8, i8, %struct.cmatcher*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8 }>
+	%struct.cpattern = type <{ %struct.cpattern*, i32, i8, i8, i8, i8, %union.anon }>
+	%struct.patprog = type <{ i64, i64, i64, i64, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%union.anon = type <{ [8 x i8] }>
+
+define i32 @addmatches(%struct.cadata* %dat, i8** nocapture %argv) nounwind ssp {
+entry:
+	br i1 undef, label %if.else, label %if.then91
+
+if.then91:		; preds = %entry
+	br label %if.end96
+
+if.else:		; preds = %entry
+	br label %if.end96
+
+if.end96:		; preds = %if.else, %if.then91
+	br i1 undef, label %lor.lhs.false, label %if.then105
+
+lor.lhs.false:		; preds = %if.end96
+	br i1 undef, label %if.else139, label %if.then105
+
+if.then105:		; preds = %lor.lhs.false, %if.end96
+	unreachable
+
+if.else139:		; preds = %lor.lhs.false
+	br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:		; preds = %if.else139
+	unreachable
+
+land.end:		; preds = %if.else139
+	br i1 undef, label %land.lhs.true285, label %if.then315
+
+land.lhs.true285:		; preds = %land.end
+	br i1 undef, label %if.end324, label %if.then322
+
+if.then315:		; preds = %land.end
+	unreachable
+
+if.then322:		; preds = %land.lhs.true285
+	unreachable
+
+if.end324:		; preds = %land.lhs.true285
+	br i1 undef, label %if.end384, label %if.then358
+
+if.then358:		; preds = %if.end324
+	unreachable
+
+if.end384:		; preds = %if.end324
+	br i1 undef, label %if.end394, label %land.lhs.true387
+
+land.lhs.true387:		; preds = %if.end384
+	unreachable
+
+if.end394:		; preds = %if.end384
+	br i1 undef, label %if.end498, label %land.lhs.true399
+
+land.lhs.true399:		; preds = %if.end394
+	br i1 undef, label %if.end498, label %if.then406
+
+if.then406:		; preds = %land.lhs.true399
+	unreachable
+
+if.end498:		; preds = %land.lhs.true399, %if.end394
+	br i1 undef, label %if.end514, label %if.then503
+
+if.then503:		; preds = %if.end498
+	unreachable
+
+if.end514:		; preds = %if.end498
+	br i1 undef, label %if.end585, label %if.then520
+
+if.then520:		; preds = %if.end514
+	br i1 undef, label %lor.lhs.false547, label %if.then560
+
+lor.lhs.false547:		; preds = %if.then520
+	unreachable
+
+if.then560:		; preds = %if.then520
+	br i1 undef, label %if.end585, label %land.lhs.true566
+
+land.lhs.true566:		; preds = %if.then560
+	br i1 undef, label %if.end585, label %if.then573
+
+if.then573:		; preds = %land.lhs.true566
+	unreachable
+
+if.end585:		; preds = %land.lhs.true566, %if.then560, %if.end514
+	br i1 undef, label %cond.true593, label %cond.false599
+
+cond.true593:		; preds = %if.end585
+	unreachable
+
+cond.false599:		; preds = %if.end585
+	br i1 undef, label %if.end647, label %if.then621
+
+if.then621:		; preds = %cond.false599
+	br i1 undef, label %cond.true624, label %cond.false630
+
+cond.true624:		; preds = %if.then621
+	br label %if.end647
+
+cond.false630:		; preds = %if.then621
+	unreachable
+
+if.end647:		; preds = %cond.true624, %cond.false599
+	br i1 undef, label %if.end723, label %if.then701
+
+if.then701:		; preds = %if.end647
+	br label %if.end723
+
+if.end723:		; preds = %if.then701, %if.end647
+	br i1 undef, label %if.else1090, label %if.then729
+
+if.then729:		; preds = %if.end723
+	br i1 undef, label %if.end887, label %if.then812
+
+if.then812:		; preds = %if.then729
+	unreachable
+
+if.end887:		; preds = %if.then729
+	br i1 undef, label %if.end972, label %if.then893
+
+if.then893:		; preds = %if.end887
+	br i1 undef, label %if.end919, label %if.then903
+
+if.then903:		; preds = %if.then893
+	unreachable
+
+if.end919:		; preds = %if.then893
+	br label %if.end972
+
+if.end972:		; preds = %if.end919, %if.end887
+	%sline.0 = phi %struct.cline* [ undef, %if.end919 ], [ null, %if.end887 ]		; <%struct.cline*> [#uses=5]
+	%bcs.0 = phi i32 [ undef, %if.end919 ], [ 0, %if.end887 ]		; <i32> [#uses=5]
+	br i1 undef, label %if.end1146, label %land.lhs.true975
+
+land.lhs.true975:		; preds = %if.end972
+	br i1 undef, label %if.end1146, label %if.then980
+
+if.then980:		; preds = %land.lhs.true975
+	br i1 undef, label %cond.false1025, label %cond.false1004
+
+cond.false1004:		; preds = %if.then980
+	unreachable
+
+cond.false1025:		; preds = %if.then980
+	br i1 undef, label %if.end1146, label %if.then1071
+
+if.then1071:		; preds = %cond.false1025
+	br i1 undef, label %if.then1074, label %if.end1081
+
+if.then1074:		; preds = %if.then1071
+	br label %if.end1081
+
+if.end1081:		; preds = %if.then1074, %if.then1071
+	%call1083 = call %struct.patprog* @patcompile(i8* undef, i32 0, i8** null) nounwind ssp		; <%struct.patprog*> [#uses=2]
+	br i1 undef, label %if.end1146, label %if.then1086
+
+if.then1086:		; preds = %if.end1081
+	br label %if.end1146
+
+if.else1090:		; preds = %if.end723
+	br i1 undef, label %if.end1146, label %land.lhs.true1093
+
+land.lhs.true1093:		; preds = %if.else1090
+	br i1 undef, label %if.end1146, label %if.then1098
+
+if.then1098:		; preds = %land.lhs.true1093
+	unreachable
+
+if.end1146:		; preds = %land.lhs.true1093, %if.else1090, %if.then1086, %if.end1081, %cond.false1025, %land.lhs.true975, %if.end972
+	%cp.0 = phi %struct.patprog* [ %call1083, %if.then1086 ], [ null, %if.end972 ], [ null, %land.lhs.true975 ], [ null, %cond.false1025 ], [ %call1083, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.patprog*> [#uses=1]
+	%sline.1 = phi %struct.cline* [ %sline.0, %if.then1086 ], [ %sline.0, %if.end972 ], [ %sline.0, %land.lhs.true975 ], [ %sline.0, %cond.false1025 ], [ %sline.0, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.cline*> [#uses=1]
+	%bcs.1 = phi i32 [ %bcs.0, %if.then1086 ], [ %bcs.0, %if.end972 ], [ %bcs.0, %land.lhs.true975 ], [ %bcs.0, %cond.false1025 ], [ %bcs.0, %if.end1081 ], [ 0, %if.else1090 ], [ 0, %land.lhs.true1093 ]		; <i32> [#uses=1]
+	br i1 undef, label %if.end1307, label %do.body1270
+
+do.body1270:		; preds = %if.end1146
+	unreachable
+
+if.end1307:		; preds = %if.end1146
+	br i1 undef, label %if.end1318, label %if.then1312
+
+if.then1312:		; preds = %if.end1307
+	unreachable
+
+if.end1318:		; preds = %if.end1307
+	br i1 undef, label %for.cond1330.preheader, label %if.then1323
+
+if.then1323:		; preds = %if.end1318
+	unreachable
+
+for.cond1330.preheader:		; preds = %if.end1318
+	%call1587 = call i8* @comp_match(i8* undef, i8* undef, i8* undef, %struct.patprog* %cp.0, %struct.cline** undef, i32 0, %struct.brinfo** undef, i32 0, %struct.brinfo** undef, i32 %bcs.1, i32* undef) nounwind ssp		; <i8*> [#uses=0]
+	%call1667 = call %struct.cmatch* @add_match_data(i32 0, i8* undef, i8* undef, %struct.cline* undef, i8* undef, i8* null, i8* undef, i8* undef, i8* undef, i8* undef, %struct.cline* null, i8* undef, %struct.cline* %sline.1, i8* undef, i32 undef, i32 undef) ssp		; <%struct.cmatch*> [#uses=0]
+	unreachable
+}
+
+declare %struct.patprog* @patcompile(i8*, i32, i8**) ssp
+
+declare i8* @comp_match(i8*, i8*, i8*, %struct.patprog*, %struct.cline**, i32, %struct.brinfo**, i32, %struct.brinfo**, i32, i32*) ssp
+
+declare %struct.cmatch* @add_match_data(i32, i8*, i8*, %struct.cline*, i8*, i8*, i8*, i8*, i8*, i8*, %struct.cline*, i8*, %struct.cline*, i8*, i32, i32) nounwind ssp

diff --git a/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll
new file mode 100644
index 0000000..e21c892
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll

@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK: pavgw LCPI1_4(%rip)
+
+; rdar://7057804
+
+define void @foo(i16* %out8x8, i16* %in8x8, i32 %lastrow) optsize ssp {
+entry:
+	%0 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%1 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %0, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%2 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%3 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %2, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i10 = add <8 x i16> %0, %3		; <<8 x i16>> [#uses=1]
+	%4 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> zeroinitializer, <8 x i16> %1) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%5 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i10, <8 x i16> %4) nounwind readnone		; <<8 x i16>> [#uses=3]
+	%6 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%7 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518, i16 6518>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%8 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %7, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%9 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%10 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %9, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i8 = add <8 x i16> %7, %10		; <<8 x i16>> [#uses=1]
+	%11 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %8) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%12 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i8, <8 x i16> %11) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%13 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%14 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%15 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %5, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%16 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %6, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%17 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %12, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%18 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %13, <8 x i16> %15) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%19 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %14) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%20 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=4]
+	%21 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %17) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%22 = bitcast <8 x i16> %21 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%23 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%24 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %23, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%25 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%26 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %25, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i6 = add <8 x i16> %23, %26		; <<8 x i16>> [#uses=1]
+	%27 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %24) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%28 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i6, <8 x i16> %27) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%29 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> undef) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%30 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %29, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%31 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> zeroinitializer, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%32 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %31, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i4 = add <8 x i16> %29, %32		; <<8 x i16>> [#uses=1]
+	%33 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> undef, <8 x i16> %30) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%34 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i4, <8 x i16> %33) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%35 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i1 = mul <8 x i16> %20, <i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170, i16 23170>		; <<8 x i16>> [#uses=1]
+	%36 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %35, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%37 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i1, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%38 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %37, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i2 = add <8 x i16> %35, %38		; <<8 x i16>> [#uses=1]
+	%39 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %36) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%40 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i2, <8 x i16> %39) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%41 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>, <8 x i16> %20) nounwind readnone		; <<8 x i16>> [#uses=2]
+	%tmp.i2.i = mul <8 x i16> %20, <i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170>		; <<8 x i16>> [#uses=1]
+	%42 = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %41, <8 x i16> <i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384, i16 16384>) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%43 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %tmp.i2.i, i32 14) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%44 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %43, <8 x i16> zeroinitializer) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%tmp.i.i = add <8 x i16> %41, %44		; <<8 x i16>> [#uses=1]
+	%45 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %19, <8 x i16> %42) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%46 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %tmp.i.i, <8 x i16> %45) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%47 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %18, <8 x i16> %16) nounwind readnone		; <<8 x i16>> [#uses=1]
+	%48 = bitcast <8 x i16> %47 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%49 = bitcast <8 x i16> %28 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%50 = getelementptr i16* %out8x8, i64 8		; <i16*> [#uses=1]
+	%51 = bitcast i16* %50 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %49, <2 x i64>* %51, align 16
+	%52 = bitcast <8 x i16> %40 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%53 = getelementptr i16* %out8x8, i64 16		; <i16*> [#uses=1]
+	%54 = bitcast i16* %53 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %52, <2 x i64>* %54, align 16
+	%55 = getelementptr i16* %out8x8, i64 24		; <i16*> [#uses=1]
+	%56 = bitcast i16* %55 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %48, <2 x i64>* %56, align 16
+	%57 = bitcast <8 x i16> %46 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%58 = getelementptr i16* %out8x8, i64 40		; <i16*> [#uses=1]
+	%59 = bitcast i16* %58 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %57, <2 x i64>* %59, align 16
+	%60 = bitcast <8 x i16> %34 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%61 = getelementptr i16* %out8x8, i64 48		; <i16*> [#uses=1]
+	%62 = bitcast i16* %61 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %60, <2 x i64>* %62, align 16
+	%63 = getelementptr i16* %out8x8, i64 56		; <i16*> [#uses=1]
+	%64 = bitcast i16* %63 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %22, <2 x i64>* %64, align 16
+	ret void
+}
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone

diff --git a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
new file mode 100644
index 0000000..3e5bd34
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
+; PR4552
+
+target triple = "i386-pc-linux-gnu"
+@g_8 = internal global i32 0		; <i32*> [#uses=1]
+@g_72 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
+entry:
+	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
+	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %func_40.exit, %entry
+	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
+	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
+	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
+	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2.i.i, label %bb.i.i
+
+bb.i.i:		; preds = %bb
+	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
+	%4 = and i32 %3, 50295		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb2.i.i, label %func_55.exit.i
+
+bb2.i.i:		; preds = %bb.i.i, %bb
+	br label %func_55.exit.i
+
+func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
+	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2.i4.i, label %bb.i3.i
+
+bb.i3.i:		; preds = %func_55.exit.i
+	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
+	%11 = and i32 %10, 50295		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb2.i4.i, label %func_40.exit
+
+bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
+	br label %func_40.exit
+
+func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
+	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
+	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
+	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
+	br label %bb
+}

diff --git a/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
new file mode 100644
index 0000000..a0095ab
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64
+; PR4583
+
+define i32 @atomic_cmpset_long(i64* %dst, i64 %exp, i64 %src) nounwind ssp noredzone noimplicitfloat {
+entry:
+	%0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* undef, i64 undef, i64 undef, i64* undef) nounwind		; <i8> [#uses=0]
+	br label %1
+
+; <label>:1		; preds = %entry
+	ret i32 undef
+}

diff --git a/test/CodeGen/X86/2009-07-20-CoalescerBug.ll b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
new file mode 100644
index 0000000..e99edd6
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll

@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; PR4587
+; rdar://7072590
+
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+
+define fastcc i32 @regex_compile(i8* %pattern, i64 %size, i64 %syntax, %struct.re_pattern_buffer* nocapture %bufp) nounwind ssp {
+entry:
+	br i1 undef, label %return, label %if.end
+
+if.end:		; preds = %entry
+	%tmp35 = getelementptr %struct.re_pattern_buffer* %bufp, i64 0, i32 3		; <i64*> [#uses=1]
+	store i64 %syntax, i64* %tmp35
+	store i32 undef, i32* undef
+	br i1 undef, label %if.then66, label %if.end102
+
+if.then66:		; preds = %if.end
+	br i1 false, label %if.else, label %if.then70
+
+if.then70:		; preds = %if.then66
+	%call74 = call i8* @xrealloc(i8* undef, i64 32) nounwind ssp		; <i8*> [#uses=0]
+	unreachable
+
+if.else:		; preds = %if.then66
+	br i1 false, label %do.body86, label %if.end99
+
+do.body86:		; preds = %if.else
+	br i1 false, label %do.end, label %if.then90
+
+if.then90:		; preds = %do.body86
+	unreachable
+
+do.end:		; preds = %do.body86
+	ret i32 12
+
+if.end99:		; preds = %if.else
+	br label %if.end102
+
+if.end102:		; preds = %if.end99, %if.end
+	br label %while.body
+
+while.body:		; preds = %if.end1126, %sw.bb532, %while.body, %if.end102
+	%laststart.2 = phi i8* [ null, %if.end102 ], [ %laststart.7.ph, %if.end1126 ], [ %laststart.2, %sw.bb532 ], [ %laststart.2, %while.body ]		; <i8*> [#uses=6]
+	%b.1 = phi i8* [ undef, %if.end102 ], [ %ctg29688, %if.end1126 ], [ %b.1, %sw.bb532 ], [ %b.1, %while.body ]		; <i8*> [#uses=5]
+	br i1 undef, label %while.body, label %if.end127
+
+if.end127:		; preds = %while.body
+	switch i32 undef, label %sw.bb532 [
+		i32 123, label %handle_interval
+		i32 92, label %do.body3527
+	]
+
+sw.bb532:		; preds = %if.end127
+	br i1 undef, label %while.body, label %if.end808
+
+if.end808:		; preds = %sw.bb532
+	br i1 undef, label %while.cond1267.preheader, label %if.then811
+
+while.cond1267.preheader:		; preds = %if.end808
+	br i1 false, label %return, label %if.end1294
+
+if.then811:		; preds = %if.end808
+	%call817 = call fastcc i8* @skip_one_char(i8* %laststart.2) ssp		; <i8*> [#uses=0]
+	br i1 undef, label %cond.end834, label %lor.lhs.false827
+
+lor.lhs.false827:		; preds = %if.then811
+	br label %cond.end834
+
+cond.end834:		; preds = %lor.lhs.false827, %if.then811
+	br i1 undef, label %land.lhs.true838, label %while.cond979.preheader
+
+land.lhs.true838:		; preds = %cond.end834
+	br i1 undef, label %if.then842, label %while.cond979.preheader
+
+if.then842:		; preds = %land.lhs.true838
+	%conv851 = trunc i64 undef to i32		; <i32> [#uses=1]
+	br label %while.cond979.preheader
+
+while.cond979.preheader:		; preds = %if.then842, %land.lhs.true838, %cond.end834
+	%startoffset.0.ph = phi i32 [ 0, %cond.end834 ], [ 0, %land.lhs.true838 ], [ %conv851, %if.then842 ]		; <i32> [#uses=2]
+	%laststart.7.ph = phi i8* [ %laststart.2, %cond.end834 ], [ %laststart.2, %land.lhs.true838 ], [ %laststart.2, %if.then842 ]		; <i8*> [#uses=3]
+	%b.4.ph = phi i8* [ %b.1, %cond.end834 ], [ %b.1, %land.lhs.true838 ], [ %b.1, %if.then842 ]		; <i8*> [#uses=3]
+	%ctg29688 = getelementptr i8* %b.4.ph, i64 6		; <i8*> [#uses=1]
+	br label %while.cond979
+
+while.cond979:		; preds = %if.end1006, %while.cond979.preheader
+	%cmp991 = icmp ugt i64 undef, 0		; <i1> [#uses=1]
+	br i1 %cmp991, label %do.body994, label %while.end1088
+
+do.body994:		; preds = %while.cond979
+	br i1 undef, label %return, label %if.end1006
+
+if.end1006:		; preds = %do.body994
+	%cmp1014 = icmp ugt i64 undef, 32768		; <i1> [#uses=1]
+	%storemerge10953 = select i1 %cmp1014, i64 32768, i64 undef		; <i64> [#uses=1]
+	store i64 %storemerge10953, i64* undef
+	br i1 false, label %return, label %while.cond979
+
+while.end1088:		; preds = %while.cond979
+	br i1 undef, label %if.then1091, label %if.else1101
+
+if.then1091:		; preds = %while.end1088
+	store i8 undef, i8* undef
+	%idx.ext1132.pre = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1133.pre = getelementptr i8* %laststart.7.ph, i64 %idx.ext1132.pre		; <i8*> [#uses=1]
+	%sub.ptr.lhs.cast1135.pre = ptrtoint i8* %add.ptr1133.pre to i64		; <i64> [#uses=1]
+	br label %if.end1126
+
+if.else1101:		; preds = %while.end1088
+	%cond1109 = select i1 undef, i32 18, i32 14		; <i32> [#uses=1]
+	%idx.ext1112 = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1113 = getelementptr i8* %laststart.7.ph, i64 %idx.ext1112		; <i8*> [#uses=2]
+	%sub.ptr.rhs.cast1121 = ptrtoint i8* %add.ptr1113 to i64		; <i64> [#uses=1]
+	call fastcc void @insert_op1(i32 %cond1109, i8* %add.ptr1113, i32 undef, i8* %b.4.ph) ssp
+	br label %if.end1126
+
+if.end1126:		; preds = %if.else1101, %if.then1091
+	%sub.ptr.lhs.cast1135.pre-phi = phi i64 [ %sub.ptr.rhs.cast1121, %if.else1101 ], [ %sub.ptr.lhs.cast1135.pre, %if.then1091 ]		; <i64> [#uses=1]
+	%add.ptr1128 = getelementptr i8* %b.4.ph, i64 3		; <i8*> [#uses=1]
+	%sub.ptr.rhs.cast1136 = ptrtoint i8* %add.ptr1128 to i64		; <i64> [#uses=1]
+	%sub.ptr.sub1137 = sub i64 %sub.ptr.lhs.cast1135.pre-phi, %sub.ptr.rhs.cast1136		; <i64> [#uses=1]
+	%sub.ptr.sub11378527 = trunc i64 %sub.ptr.sub1137 to i32		; <i32> [#uses=1]
+	%conv1139 = add i32 %sub.ptr.sub11378527, -3		; <i32> [#uses=1]
+	store i8 undef, i8* undef
+	%shr10.i8599 = lshr i32 %conv1139, 8		; <i32> [#uses=1]
+	%conv6.i8600 = trunc i32 %shr10.i8599 to i8		; <i8> [#uses=1]
+	store i8 %conv6.i8600, i8* undef
+	br label %while.body
+
+if.end1294:		; preds = %while.cond1267.preheader
+	ret i32 12
+
+do.body3527:		; preds = %if.end127
+	br i1 undef, label %do.end3536, label %if.then3531
+
+if.then3531:		; preds = %do.body3527
+	unreachable
+
+do.end3536:		; preds = %do.body3527
+	ret i32 5
+
+handle_interval:		; preds = %if.end127
+	br i1 undef, label %do.body4547, label %cond.false4583
+
+do.body4547:		; preds = %handle_interval
+	br i1 undef, label %do.end4556, label %if.then4551
+
+if.then4551:		; preds = %do.body4547
+	unreachable
+
+do.end4556:		; preds = %do.body4547
+	ret i32 9
+
+cond.false4583:		; preds = %handle_interval
+	unreachable
+
+return:		; preds = %if.end1006, %do.body994, %while.cond1267.preheader, %entry
+	ret i32 undef
+}
+
+declare i8* @xrealloc(i8*, i64) ssp
+
+declare fastcc i8* @skip_one_char(i8*) nounwind readonly ssp
+
+declare fastcc void @insert_op1(i32, i8*, i32, i8*) nounwind ssp

diff --git a/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
new file mode 100644
index 0000000..e83b3a7
--- /dev/null
+++ b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+
+@bsBuff = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @bsGetUInt32 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @bsGetUInt32() nounwind ssp {
+entry:
+	%bsBuff.promoted44 = load i32* @bsBuff		; <i32> [#uses=1]
+	%0 = add i32 0, -8		; <i32> [#uses=1]
+	%1 = lshr i32 %bsBuff.promoted44, %0		; <i32> [#uses=1]
+	%2 = shl i32 %1, 8		; <i32> [#uses=1]
+	br label %bb3.i17
+
+bb3.i9:		; preds = %bb3.i17
+	br i1 false, label %bb2.i16, label %bb1.i15
+
+bb1.i15:		; preds = %bb3.i9
+	unreachable
+
+bb2.i16:		; preds = %bb3.i9
+	br label %bb3.i17
+
+bb3.i17:		; preds = %bb2.i16, %entry
+	br i1 false, label %bb3.i9, label %bsR.exit18
+
+bsR.exit18:		; preds = %bb3.i17
+	%3 = or i32 0, %2		; <i32> [#uses=0]
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
new file mode 100644
index 0000000..b9b09a3
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; PR4669
+declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+
+define <1 x i64> @test(i64 %t) {
+entry:
+	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
+	%t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
+	ret <1 x i64> %t2
+}

diff --git a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
new file mode 100644
index 0000000..b329c91
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll

@@ -0,0 +1,142 @@
+; RUN: llc < %s -O3
+; PR4626
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = common global i8 0, align 1		; <i8*> [#uses=2]
+
+define signext i8 @safe_mul_func_int16_t_s_s(i32 %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+	%tobool = icmp eq i32 %_si1, 0		; <i1> [#uses=1]
+	%cmp = icmp sgt i8 %_si2, 0		; <i1> [#uses=2]
+	%or.cond = or i1 %cmp, %tobool		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.rhs, label %land.lhs.true3
+
+land.lhs.true3:		; preds = %entry
+	%conv5 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	%cmp7 = icmp slt i32 %conv5, %_si1		; <i1> [#uses=1]
+	br i1 %cmp7, label %cond.end, label %lor.rhs
+
+lor.rhs:		; preds = %land.lhs.true3, %entry
+	%cmp10.not = icmp slt i32 %_si1, 1		; <i1> [#uses=1]
+	%or.cond23 = and i1 %cmp, %cmp10.not		; <i1> [#uses=1]
+	br i1 %or.cond23, label %lor.end, label %cond.false
+
+lor.end:		; preds = %lor.rhs
+	%tobool19 = icmp ne i8 %_si2, 0		; <i1> [#uses=2]
+	%lor.ext = zext i1 %tobool19 to i32		; <i32> [#uses=1]
+	br i1 %tobool19, label %cond.end, label %cond.false
+
+cond.false:		; preds = %lor.end, %lor.rhs
+	%conv21 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	br label %cond.end
+
+cond.end:		; preds = %cond.false, %lor.end, %land.lhs.true3
+	%cond = phi i32 [ %conv21, %cond.false ], [ 1, %land.lhs.true3 ], [ %lor.ext, %lor.end ]		; <i32> [#uses=1]
+	%conv22 = trunc i32 %cond to i8		; <i8> [#uses=1]
+	ret i8 %conv22
+}
+
+define i32 @func_34(i8 signext %p_35) nounwind readonly {
+entry:
+	%tobool = icmp eq i8 %p_35, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:		; preds = %entry
+	%tmp1 = load i8* @g_3		; <i8> [#uses=1]
+	%tobool3 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tobool3, label %return, label %if.then
+
+if.then:		; preds = %lor.lhs.false, %entry
+	%tmp4 = load i8* @g_3		; <i8> [#uses=1]
+	%conv5 = sext i8 %tmp4 to i32		; <i32> [#uses=1]
+	ret i32 %conv5
+
+return:		; preds = %lor.lhs.false
+	ret i32 0
+}
+
+define void @foo(i32 %p_5) noreturn nounwind {
+entry:
+	%cmp = icmp sgt i32 %p_5, 0		; <i1> [#uses=2]
+	%call = tail call i32 @safe() nounwind		; <i32> [#uses=1]
+	%conv1 = trunc i32 %call to i8		; <i8> [#uses=3]
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=3]
+	%cmp.i = icmp sgt i8 %conv1, 0		; <i1> [#uses=3]
+	%or.cond.i = or i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:		; preds = %entry
+	%xor = zext i1 %cmp to i32		; <i32> [#uses=1]
+	%conv5.i = sext i8 %conv1 to i32		; <i32> [#uses=1]
+	%cmp7.i = icmp slt i32 %conv5.i, %xor		; <i1> [#uses=1]
+	%cmp7.i.not = xor i1 %cmp7.i, true		; <i1> [#uses=1]
+	%or.cond23.i = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	%or.cond = and i1 %cmp7.i.not, %or.cond23.i		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:		; preds = %entry
+	%or.cond23.i.old = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:		; preds = %lor.rhs.i, %land.lhs.true3.i
+	%tobool19.i = icmp eq i8 %conv1, 0		; <i1> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+	br label %for.inc
+}
+
+declare i32 @safe()
+
+define i32 @func_35(i8 signext %p_35) nounwind readonly {
+entry:
+  %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:                                    ; preds = %entry
+  %tmp1 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tobool3 = icmp eq i8 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool3, label %return, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %tmp4 = load i8* @g_3                           ; <i8> [#uses=1]
+  %conv5 = sext i8 %tmp4 to i32                   ; <i32> [#uses=1]
+  ret i32 %conv5
+
+return:                                           ; preds = %lor.lhs.false
+  ret i32 0
+}
+
+define void @bar(i32 %p_5) noreturn nounwind {
+entry:
+  %cmp = icmp sgt i32 %p_5, 0                     ; <i1> [#uses=2]
+  %call = tail call i32 @safe() nounwind          ; <i32> [#uses=1]
+  %conv1 = trunc i32 %call to i8                  ; <i8> [#uses=3]
+  %tobool.i = xor i1 %cmp, true                   ; <i1> [#uses=3]
+  %cmp.i = icmp sgt i8 %conv1, 0                  ; <i1> [#uses=3]
+  %or.cond.i = or i1 %cmp.i, %tobool.i            ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:                                 ; preds = %entry
+  %xor = zext i1 %cmp to i32                      ; <i32> [#uses=1]
+  %conv5.i = sext i8 %conv1 to i32                ; <i32> [#uses=1]
+  %cmp7.i = icmp slt i32 %conv5.i, %xor           ; <i1> [#uses=1]
+  %cmp7.i.not = xor i1 %cmp7.i, true              ; <i1> [#uses=1]
+  %or.cond23.i = and i1 %cmp.i, %tobool.i         ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp7.i.not, %or.cond23.i     ; <i1> [#uses=1]
+  br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:                                        ; preds = %entry
+  %or.cond23.i.old = and i1 %cmp.i, %tobool.i     ; <i1> [#uses=1]
+  br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %land.lhs.true3.i
+  %tobool19.i = icmp eq i8 %conv1, 0              ; <i1> [#uses=0]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+  br label %for.inc
+}
+
+declare i32 @safe()

diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
new file mode 100644
index 0000000..cc2f3d8
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s
+; PR4668
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @x(i32 %qscale) nounwind {
+entry:
+	%temp_block = alloca [64 x i16], align 16		; <[64 x i16]*> [#uses=0]
+	%tmp = call i32 asm sideeffect "xor %edx, %edx", "={dx},~{dirflag},~{fpsr},~{flags}"() nounwind		; <i32> [#uses=1]
+	br i1 undef, label %if.end78, label %if.then28
+
+if.then28:		; preds = %entry
+	br label %if.end78
+
+if.end78:		; preds = %if.then28, %entry
+	%level.1 = phi i32 [ %tmp, %if.then28 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%add.ptr1 = getelementptr [64 x i16]* null, i32 0, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr2 = getelementptr [64 x i16]* null, i32 1, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr3 = getelementptr [64 x i16]* null, i32 2, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr4 = getelementptr [64 x i16]* null, i32 3, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr5 = getelementptr [64 x i16]* null, i32 4, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr6 = getelementptr [64 x i16]* null, i32 5, i32 %qscale		; <i16*> [#uses=1]
+	%tmp1 = call i32 asm sideeffect "nop", "={ax},r,r,r,r,r,0,~{dirflag},~{fpsr},~{flags}"(i16* %add.ptr6, i16* %add.ptr5, i16* %add.ptr4, i16* %add.ptr3, i16* %add.ptr2, i16* %add.ptr1) nounwind		; <i32> [#uses=0]
+	ret i32 %level.1
+}

diff --git a/test/CodeGen/X86/2009-08-08-CastError.ll b/test/CodeGen/X86/2009-08-08-CastError.ll
new file mode 100644
index 0000000..9456d91
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-08-CastError.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-mingw64 | grep movabsq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define <4 x float> @RecursiveTestFunc1(i8*) {
+EntryBlock:
+	%1 = call <4 x float> inttoptr (i64 5367207198 to <4 x float> (i8*, float, float, float, float)*)(i8* %0, float 8.000000e+00, float 5.000000e+00, float 3.000000e+00, float 4.000000e+00)		; <<4 x float>> [#uses=1]
+	ret <4 x float> %1
+}

diff --git a/test/CodeGen/X86/2009-08-12-badswitch.ll b/test/CodeGen/X86/2009-08-12-badswitch.ll
new file mode 100644
index 0000000..a94fce0
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-12-badswitch.ll

@@ -0,0 +1,176 @@
+; RUN: llc < %s | grep LJT
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+
+declare void @f1() nounwind readnone
+declare void @f2() nounwind readnone
+declare void @f3() nounwind readnone
+declare void @f4() nounwind readnone
+declare void @f5() nounwind readnone
+declare void @f6() nounwind readnone
+declare void @f7() nounwind readnone
+declare void @f8() nounwind readnone
+declare void @f9() nounwind readnone
+declare void @f10() nounwind readnone
+declare void @f11() nounwind readnone
+declare void @f12() nounwind readnone
+declare void @f13() nounwind readnone
+declare void @f14() nounwind readnone
+declare void @f15() nounwind readnone
+declare void @f16() nounwind readnone
+declare void @f17() nounwind readnone
+declare void @f18() nounwind readnone
+declare void @f19() nounwind readnone
+declare void @f20() nounwind readnone
+declare void @f21() nounwind readnone
+declare void @f22() nounwind readnone
+declare void @f23() nounwind readnone
+declare void @f24() nounwind readnone
+declare void @f25() nounwind readnone
+declare void @f26() nounwind readnone
+
+define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
+entry:
+        br label %bb49
+
+bb49:
+	switch i64 %bar, label %RETURN [
+		i64 2, label %RRETURN_2
+		i64 3, label %RRETURN_6
+		i64 4, label %RRETURN_7
+		i64 5, label %RRETURN_14
+		i64 6, label %RRETURN_15
+		i64 7, label %RRETURN_16
+		i64 8, label %RRETURN_17
+		i64 9, label %RRETURN_18
+		i64 10, label %RRETURN_19
+		i64 11, label %RRETURN_20
+		i64 12, label %RRETURN_21
+		i64 13, label %RRETURN_22
+		i64 14, label %RRETURN_24
+		i64 15, label %RRETURN_26
+		i64 16, label %RRETURN_27
+		i64 17, label %RRETURN_28
+		i64 18, label %RRETURN_29
+		i64 19, label %RRETURN_30
+		i64 20, label %RRETURN_31
+		i64 21, label %RRETURN_38
+		i64 22, label %RRETURN_40
+		i64 23, label %RRETURN_42
+		i64 24, label %RRETURN_44
+		i64 25, label %RRETURN_48
+		i64 26, label %RRETURN_52
+		i64 27, label %RRETURN_1
+	]
+
+RETURN:
+        call void @f1()
+        br label %EXIT
+
+RRETURN_2:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_6:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_7:		; preds = %bb49
+        call void @f3()
+        br label %EXIT
+
+RRETURN_14:		; preds = %bb49
+        call void @f4()
+        br label %EXIT
+
+RRETURN_15:		; preds = %bb49
+        call void @f5()
+        br label %EXIT
+
+RRETURN_16:		; preds = %bb49
+        call void @f6()
+        br label %EXIT
+
+RRETURN_17:		; preds = %bb49
+        call void @f7()
+        br label %EXIT
+
+RRETURN_18:		; preds = %bb49
+        call void @f8()
+        br label %EXIT
+
+RRETURN_19:		; preds = %bb49
+        call void @f9()
+        br label %EXIT
+
+RRETURN_20:		; preds = %bb49
+        call void @f10()
+        br label %EXIT
+
+RRETURN_21:		; preds = %bb49
+        call void @f11()
+        br label %EXIT
+
+RRETURN_22:		; preds = %bb49
+        call void @f12()
+        br label %EXIT
+
+RRETURN_24:		; preds = %bb49
+        call void @f13()
+        br label %EXIT
+
+RRETURN_26:		; preds = %bb49
+        call void @f14()
+        br label %EXIT
+
+RRETURN_27:		; preds = %bb49
+        call void @f15()
+        br label %EXIT
+
+RRETURN_28:		; preds = %bb49
+        call void @f16()
+        br label %EXIT
+
+RRETURN_29:		; preds = %bb49
+        call void @f17()
+        br label %EXIT
+
+RRETURN_30:		; preds = %bb49
+        call void @f18()
+        br label %EXIT
+
+RRETURN_31:		; preds = %bb49
+        call void @f19()
+        br label %EXIT
+
+RRETURN_38:		; preds = %bb49
+        call void @f20()
+        br label %EXIT
+
+RRETURN_40:		; preds = %bb49
+        call void @f21()
+        br label %EXIT
+
+RRETURN_42:		; preds = %bb49
+        call void @f22()
+        br label %EXIT
+
+RRETURN_44:		; preds = %bb49
+        call void @f23()
+        br label %EXIT
+
+RRETURN_48:		; preds = %bb49
+        call void @f24()
+        br label %EXIT
+
+RRETURN_52:		; preds = %bb49
+        call void @f25()
+        br label %EXIT
+
+RRETURN_1:		; preds = %bb49
+        call void @f26()
+        br label %EXIT
+
+EXIT:
+        ret i32 0
+}

diff --git a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
new file mode 100644
index 0000000..6b0d6d9
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s
+target triple = "x86_64-mingw"
+
+; ModuleID = 'mm.bc'
+	type opaque		; type %0
+	type opaque		; type %1
+
+define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
+indexCheckBlock:
+	%indexCmp = icmp ugt i32 %index, 16		; <i1> [#uses=1]
+	br i1 %indexCmp, label %zeroReturnBlock, label %primitiveTextureFetchBlock
+
+primitiveTextureFetchBlock:		; preds = %indexCheckBlock
+	%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp1 = getelementptr i8* %pointerArithmeticTmp, i64 1808		; <i8*> [#uses=1]
+	%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1**		; <%1**> [#uses=1]
+	%primitivePtr = load %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
+	%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp4 = getelementptr i8* %pointerArithmeticTmp3, i64 19408		; <i8*> [#uses=1]
+	%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1**		; <%1**> [#uses=1]
+	%primitiveTexturePtr = getelementptr %1** %pointerArithmeticTmp5, i32 %index		; <%1**> [#uses=1]
+	%primitiveTexturePtr6 = load %1** %primitiveTexturePtr		; <%1*> [#uses=2]
+	br label %textureCheckBlock
+
+textureCheckBlock:		; preds = %primitiveTextureFetchBlock
+	%texturePtrInt = ptrtoint %1* %primitiveTexturePtr6 to i64		; <i64> [#uses=1]
+	%testTextureNULL = icmp eq i64 %texturePtrInt, 0		; <i1> [#uses=1]
+	br i1 %testTextureNULL, label %zeroReturnBlock, label %rhoCalculateBlock
+
+rhoCalculateBlock:		; preds = %textureCheckBlock
+	%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp8 = getelementptr i8* %pointerArithmeticTmp7, i64 640		; <i8*> [#uses=1]
+	%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
+	%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord		; <<4 x float>> [#uses=1]
+	%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord		; <<4 x float>> [#uses=1]
+	%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddx10 = fmul <4 x float> %texDiffDY, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddxSquared = fmul <4 x float> %ddx, %ddx		; <<4 x float>> [#uses=3]
+	%0 = shufflevector <4 x float> %ddxSquared, <4 x float> %ddxSquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dxSquared = fadd <4 x float> %ddxSquared, %0		; <<4 x float>> [#uses=1]
+	%1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dxSquared)		; <<4 x float>> [#uses=1]
+	%ddySquared = fmul <4 x float> %ddx10, %ddx10		; <<4 x float>> [#uses=3]
+	%2 = shufflevector <4 x float> %ddySquared, <4 x float> %ddySquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dySquared = fadd <4 x float> %ddySquared, %2		; <<4 x float>> [#uses=1]
+	%3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dySquared)		; <<4 x float>> [#uses=1]
+	%4 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %3)		; <<4 x float>> [#uses=1]
+	%rho = extractelement <4 x float> %4, i32 0		; <float> [#uses=1]
+	ret float %rho
+
+zeroReturnBlock:		; preds = %textureCheckBlock, %indexCheckBlock
+	ret float 0.000000e+00
+}
+
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone

diff --git a/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
new file mode 100644
index 0000000..5f6cf3b
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-pc-linux | FileCheck %s
+
+@a = external global i96, align 4
+@b = external global i64, align 8
+
+define void @c() nounwind {
+; CHECK: movl a+8, %eax
+  %srcval1 = load i96* @a, align 4
+  %sroa.store.elt2 = lshr i96 %srcval1, 64
+  %tmp = trunc i96 %sroa.store.elt2 to i64
+; CHECK: movl %eax, b
+; CHECK: movl $0, b+4
+  store i64 %tmp, i64* @b, align 8
+  ret void
+}

diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
new file mode 100644
index 0000000..790fd88
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll

@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86
+; PR4753
+
+; This function has a sub-register reuse undone.
+
+@uint8 = external global i32                      ; <i32*> [#uses=3]
+
+declare signext i8 @foo(i32, i8 signext) nounwind readnone
+
+declare signext i8 @bar(i32, i8 signext) nounwind readnone
+
+define i32 @uint80(i8 signext %p_52) nounwind {
+entry:
+  %0 = sext i8 %p_52 to i16                       ; <i16> [#uses=1]
+  %1 = tail call i32 @func_24(i16 zeroext %0, i8 signext ptrtoint (i8 (i32, i8)* @foo to i8)) nounwind; <i32> [#uses=1]
+  %2 = trunc i32 %1 to i8                         ; <i8> [#uses=1]
+  %3 = or i8 %2, 1                                ; <i8> [#uses=1]
+  %4 = tail call i32 @safe(i32 1) nounwind        ; <i32> [#uses=0]
+  %5 = tail call i32 @func_24(i16 zeroext 0, i8 signext undef) nounwind; <i32> [#uses=1]
+  %6 = trunc i32 %5 to i8                         ; <i8> [#uses=1]
+  %7 = xor i8 %3, %p_52                           ; <i8> [#uses=1]
+  %8 = xor i8 %7, %6                              ; <i8> [#uses=1]
+  %9 = icmp ne i8 %p_52, 0                        ; <i1> [#uses=1]
+  %10 = zext i1 %9 to i8                          ; <i8> [#uses=1]
+  %11 = tail call i32 @func_24(i16 zeroext ptrtoint (i8 (i32, i8)* @bar to i16), i8 signext %10) nounwind; <i32> [#uses=1]
+  %12 = tail call i32 @func_24(i16 zeroext 0, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %iftmp.2.0 = phi i32 [ 0, %bb2 ], [ 1, %bb ]    ; <i32> [#uses=1]
+  %13 = icmp ne i32 %11, %iftmp.2.0               ; <i1> [#uses=1]
+  %14 = tail call i32 @safe(i32 -2) nounwind      ; <i32> [#uses=0]
+  %15 = zext i1 %13 to i8                         ; <i8> [#uses=1]
+  %16 = tail call signext i8 @func_53(i8 signext undef, i8 signext 1, i8 signext %15, i8 signext %8) nounwind; <i8> [#uses=0]
+  br i1 undef, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
+  %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %return, label %bb6.preheader
+
+bb6.preheader:                                    ; preds = %bb5
+  %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
+  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %bb5
+  ret i32 undef
+}
+
+declare i32 @func_24(i16 zeroext, i8 signext)
+
+declare i32 @safe(i32)
+
+declare signext i8 @func_53(i8 signext, i8 signext, i8 signext, i8 signext)
+
+declare i32 @safefuncts(...)

diff --git a/test/CodeGen/X86/2009-08-23-linkerprivate.ll b/test/CodeGen/X86/2009-08-23-linkerprivate.ll
new file mode 100644
index 0000000..3da8f00
--- /dev/null
+++ b/test/CodeGen/X86/2009-08-23-linkerprivate.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16		; <i32*> [#uses=0]
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc

diff --git a/test/CodeGen/X86/2009-09-07-CoalescerBug.ll b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll
new file mode 100644
index 0000000..a5b4a79
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-07-CoalescerBug.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd7.2 -code-model=kernel | FileCheck %s
+; PR4689
+
+%struct.__s = type { [8 x i8] }
+%struct.pcb = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i16, i8* }
+%struct.pcpu = type { i32*, i32*, i32*, i32*, %struct.pcb*, i64, i32, i32, i32, i32 }
+
+define i64 @hammer_time(i64 %modulep, i64 %physfree) nounwind ssp noredzone noimplicitfloat {
+; CHECK: hammer_time:
+; CHECK: movq $Xrsvd, %rax
+; CHECK: movq $Xrsvd, %rsi
+; CHECK: movq $Xrsvd, %rdi
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %if.end
+  switch i32 undef, label %if.then76 [
+    i32 9, label %for.inc
+    i32 10, label %for.inc
+    i32 11, label %for.inc
+    i32 12, label %for.inc
+  ]
+
+if.then76:                                        ; preds = %for.body
+  unreachable
+
+for.inc:                                          ; preds = %for.body, %for.body, %for.body, %for.body
+  br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  call void asm sideeffect "mov $1,%gs:$0", "=*m,r,~{dirflag},~{fpsr},~{flags}"(%struct.__s* bitcast (%struct.pcb** getelementptr (%struct.pcpu* null, i32 0, i32 4) to %struct.__s*), i64 undef) nounwind
+  br label %for.body170
+
+for.body170:                                      ; preds = %for.body170, %for.end
+  store i64 or (i64 and (i64 or (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 2097152), i64 2162687), i64 or (i64 or (i64 and (i64 shl (i64 ptrtoint (void (i32, i32, i32, i32)* @Xrsvd to i64), i64 32), i64 -281474976710656), i64 140737488355328), i64 15393162788864)), i64* undef
+  br i1 undef, label %for.end175, label %for.body170
+
+for.end175:                                       ; preds = %for.body170
+  unreachable
+}
+
+declare void @Xrsvd(i32, i32, i32, i32) ssp noredzone noimplicitfloat

diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
new file mode 100644
index 0000000..7b5e871
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+; It's not legal to fold a load from 32-bit stack slot into a 64-bit
+; instruction. If done, the instruction does a 64-bit load and that's not
+; safe. This can happen we a subreg_to_reg 0 has been coalesced. One
+; exception is when the instruction that folds the load is a move, then we
+; can simply turn it into a 32-bit load from the stack slot.
+; rdar://7170444
+
+%struct.ComplexType = type { i32 }
+
+define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp {
+entry:
+; CHECK: _t:
+; CHECK: movl 16(%rbp),
+  %0 = zext i32 %argumentsLength to i64           ; <i64> [#uses=1]
+  %1 = zext i32 %clientPort to i64                ; <i64> [#uses=1]
+  %2 = inttoptr i64 %1 to %struct.ComplexType*    ; <%struct.ComplexType*> [#uses=1]
+  %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID)
+          to label %invcont unwind label %lpad    ; <i8*> [#uses=1]
+
+invcont:                                          ; preds = %entry
+  %4 = add i32 %requestID, %pluginID              ; <i32> [#uses=0]
+  %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef)
+          to label %invcont1 unwind label %lpad   ; <i8> [#uses=0]
+
+invcont1:                                         ; preds = %invcont
+  %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
+          to label %invcont2 unwind label %lpad
+
+invcont2:                                         ; preds = %invcont1
+  ret i32 0
+
+lpad:                                             ; preds = %invcont1, %invcont, %entry
+  %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0]
+  unreachable
+}
+
+declare i32 @vm_deallocate(i32, i64, i64)
+
+declare i8* @pluginInstance(i8*, i32)
+
+declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*)
+
+declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)

diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
new file mode 100644
index 0000000..f9ca861
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll

@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
+
+; This test shouldn't require spills.
+
+; CHECK: subq  $8, %rsp
+; CHECK-NOT: $rsp
+; CHECK: addq  $8, %rsp
+
+	%struct..0anon = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+@rtx_format = external global [116 x i8*]		; <[116 x i8*]*> [#uses=1]
+@rtx_length = external global [117 x i32]		; <[117 x i32]*> [#uses=1]
+
+declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
+
+define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
+entry:
+	%tmp2 = icmp eq %struct.rtx_def* %x, null		; <i1> [#uses=1]
+	br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:		; preds = %entry
+	%tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp7 = load i16* %tmp6		; <i16> [#uses=2]
+	%tmp78 = zext i16 %tmp7 to i32		; <i32> [#uses=2]
+	%tmp10 = icmp eq i16 %tmp7, 54		; <i1> [#uses=1]
+	br i1 %tmp10, label %cond_true13, label %cond_next32
+
+cond_true13:		; preds = %cond_next
+	%tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3		; <[1 x %struct..0anon]*> [#uses=1]
+	%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp19 = load %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
+	%tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp21 = load i16* %tmp20		; <i16> [#uses=1]
+	%tmp22 = icmp eq i16 %tmp21, 57		; <i1> [#uses=1]
+	br i1 %tmp22, label %cond_true25, label %cond_next32
+
+cond_true25:		; preds = %cond_true13
+	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %tmp29
+
+cond_next32:		; preds = %cond_true13, %cond_next
+	%tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78		; <i8**> [#uses=1]
+	%tmp35 = load i8** %tmp34, align 4		; <i8*> [#uses=1]
+	%tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78		; <i32*> [#uses=1]
+	%tmp38 = load i32* %tmp37, align 4		; <i32> [#uses=1]
+	%i.011 = add i32 %tmp38, -1		; <i32> [#uses=2]
+	%tmp12513 = icmp sgt i32 %i.011, -1		; <i1> [#uses=1]
+	br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %bb123, %cond_next32
+	%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ]		; <i32> [#uses=2]
+	%i.01.0 = sub i32 %i.011, %indvar		; <i32> [#uses=5]
+	%tmp42 = getelementptr i8* %tmp35, i32 %i.01.0		; <i8*> [#uses=2]
+	%tmp43 = load i8* %tmp42		; <i8> [#uses=1]
+	switch i8 %tmp43, label %bb123 [
+		 i8 101, label %cond_true47
+		 i8 69, label %bb105.preheader
+	]
+
+cond_true47:		; preds = %bb
+	%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
+	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
+	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
+	store i32 %tmp58.c, i32* %tmp62
+	%tmp6816 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp6917 = icmp eq i8 %tmp6816, 69		; <i1> [#uses=1]
+	br i1 %tmp6917, label %bb105.preheader, label %bb123
+
+bb105.preheader:		; preds = %cond_true47, %bb
+	%tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def**		; <%struct.rtvec_def**> [#uses=3]
+	%tmp11322 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp11524 = load i32* %tmp11423		; <i32> [#uses=1]
+	%tmp11625 = icmp eq i32 %tmp11524, 0		; <i1> [#uses=1]
+	br i1 %tmp11625, label %bb123, label %bb73
+
+bb73:		; preds = %bb73, %bb105.preheader
+	%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ]		; <i32> [#uses=3]
+	%tmp81 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
+	%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
+	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
+	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
+	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
+	store i32 %tmp98.c, i32* %tmp101
+	%tmp104 = add i32 %j.019, 1		; <i32> [#uses=2]
+	%tmp113 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp115 = load i32* %tmp114		; <i32> [#uses=1]
+	%tmp116 = icmp ult i32 %tmp104, %tmp115		; <i1> [#uses=1]
+	br i1 %tmp116, label %bb73, label %bb123
+
+bb123:		; preds = %bb73, %bb105.preheader, %cond_true47, %bb
+	%i.0 = add i32 %i.01.0, -1		; <i32> [#uses=1]
+	%tmp125 = icmp sgt i32 %i.0, -1		; <i1> [#uses=1]
+	%indvar.next26 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp125, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb123, %cond_next32, %entry
+	%UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ]		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %UnifiedRetVal
+}

diff --git a/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
new file mode 100644
index 0000000..18b5a17
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll

@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR4910
+
+%0 = type { i32, i32, i32, i32 }
+
+@boot_cpu_id = external global i32                ; <i32*> [#uses=1]
+@cpu_logical = common global i32 0, align 4       ; <i32*> [#uses=1]
+
+define void @topo_probe_0xb() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc40, %for.inc38 ] ; <i32> [#uses=3]
+  %cmp = icmp slt i32 %0, 3                       ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end41
+
+for.body:                                         ; preds = %for.cond
+  %1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,{cx},~{dirflag},~{fpsr},~{flags}"(i32 11, i32 %0) nounwind ; <%0> [#uses=3]
+  %asmresult.i = extractvalue %0 %1, 0            ; <i32> [#uses=1]
+  %asmresult10.i = extractvalue %0 %1, 2          ; <i32> [#uses=1]
+  %and = and i32 %asmresult.i, 31                 ; <i32> [#uses=2]
+  %shr42 = lshr i32 %asmresult10.i, 8             ; <i32> [#uses=1]
+  %and12 = and i32 %shr42, 255                    ; <i32> [#uses=2]
+  %cmp14 = icmp eq i32 %and12, 0                  ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.end41, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %asmresult9.i = extractvalue %0 %1, 1           ; <i32> [#uses=1]
+  %and7 = and i32 %asmresult9.i, 65535            ; <i32> [#uses=1]
+  %cmp16 = icmp eq i32 %and7, 0                   ; <i1> [#uses=1]
+  br i1 %cmp16, label %for.end41, label %for.cond17.preheader
+
+for.cond17.preheader:                             ; preds = %lor.lhs.false
+  %tmp24 = load i32* @boot_cpu_id                 ; <i32> [#uses=1]
+  %shr26 = ashr i32 %tmp24, %and                  ; <i32> [#uses=1]
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20, %for.cond17.preheader
+  %2 = phi i32 [ 0, %for.cond17.preheader ], [ %inc32, %for.body20 ] ; <i32> [#uses=2]
+  %cnt.143 = phi i32 [ 0, %for.cond17.preheader ], [ %inc.cnt.1, %for.body20 ] ; <i32> [#uses=1]
+  %shr23 = ashr i32 %2, %and                      ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %shr23, %shr26             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp27 to i32                    ; <i32> [#uses=1]
+  %inc.cnt.1 = add i32 %inc, %cnt.143             ; <i32> [#uses=2]
+  %inc32 = add nsw i32 %2, 1                      ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc32, 255             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body20
+
+for.end:                                          ; preds = %for.body20
+  %cmp34 = icmp eq i32 %and12, 1                  ; <i1> [#uses=1]
+  br i1 %cmp34, label %if.then35, label %for.inc38
+
+if.then35:                                        ; preds = %for.end
+  store i32 %inc.cnt.1, i32* @cpu_logical
+  br label %for.inc38
+
+for.inc38:                                        ; preds = %for.end, %if.then35
+  %inc40 = add nsw i32 %0, 1                      ; <i32> [#uses=1]
+  br label %for.cond
+
+for.end41:                                        ; preds = %lor.lhs.false, %for.body, %for.cond
+  ret void
+}

diff --git a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
new file mode 100644
index 0000000..8cb538b
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -post-RA-scheduler=true | FileCheck %s
+
+; PR4958
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: main:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  br label %bb
+
+bb:                                               ; preds = %bb1, %entry
+; CHECK:      addl $1
+; CHECK-NEXT: movl %e
+; CHECK-NEXT: adcl $0
+  %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ]      ; <i64> [#uses=1]
+  %0 = add nsw i64 %i.0, 1                        ; <i64> [#uses=2]
+  %1 = icmp sgt i32 0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %2 = icmp sle i64 %0, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  br label %return
+
+return:                                           ; preds = %bb2
+  ret i32 0
+}

diff --git a/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
new file mode 100644
index 0000000..4f44cae
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-19-earlyclobber.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '4964.c'
+; PR 4964
+; Registers other than RAX, RCX are OK, but they must be different.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+	type { i64, i64 }		; type %0
+
+define i64 @flsst(i64 %find) nounwind ssp {
+entry:
+; CHECK: FOO %rax %rcx
+	%asmtmp = tail call %0 asm sideeffect "FOO $0 $1 $2", "=r,=&r,rm,~{dirflag},~{fpsr},~{flags},~{cc}"(i64 %find) nounwind		; <%0> [#uses=1]
+	%asmresult = extractvalue %0 %asmtmp, 0		; <i64> [#uses=1]
+	ret i64 %asmresult
+}

diff --git a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
new file mode 100644
index 0000000..80b8835
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}

diff --git a/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
new file mode 100644
index 0000000..33f35f8
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll

@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  ret i32 3
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 3
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+bb.i18:                                           ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+quantum_getwidth.exit:                            ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb4, label %bb6.preheader
+
+bb4:                                              ; preds = %quantum_getwidth.exit
+  unreachable
+
+bb6.preheader:                                    ; preds = %quantum_getwidth.exit
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb6.preheader
+  unreachable
+
+bb1.i2:                                           ; preds = %bb6.preheader
+  br i1 undef, label %bb2.i, label %bb3.i4
+
+bb2.i:                                            ; preds = %bb1.i2
+  unreachable
+
+bb3.i4:                                           ; preds = %bb1.i2
+  br i1 undef, label %quantum_new_qureg.exit, label %bb4.i
+
+bb4.i:                                            ; preds = %bb3.i4
+  unreachable
+
+quantum_new_qureg.exit:                           ; preds = %bb3.i4
+  br i1 undef, label %bb9, label %bb11.thread
+
+bb11.thread:                                      ; preds = %quantum_new_qureg.exit
+  %.cast.i = zext i32 undef to i64                ; <i64> [#uses=1]
+  br label %bb.i37
+
+bb9:                                              ; preds = %quantum_new_qureg.exit
+  unreachable
+
+bb.i37:                                           ; preds = %bb.i37, %bb11.thread
+  %0 = load i64* undef, align 8                   ; <i64> [#uses=1]
+  %1 = shl i64 %0, %.cast.i                       ; <i64> [#uses=1]
+  store i64 %1, i64* undef, align 8
+  br i1 undef, label %bb.i37, label %quantum_addscratch.exit
+
+quantum_addscratch.exit:                          ; preds = %bb.i37
+  br i1 undef, label %bb12.preheader, label %bb14
+
+bb12.preheader:                                   ; preds = %quantum_addscratch.exit
+  unreachable
+
+bb14:                                             ; preds = %quantum_addscratch.exit
+  br i1 undef, label %bb17, label %bb.nph
+
+bb.nph:                                           ; preds = %bb14
+  unreachable
+
+bb17:                                             ; preds = %bb14
+  br i1 undef, label %bb1.i7, label %quantum_measure.exit
+
+bb1.i7:                                           ; preds = %bb17
+  br label %quantum_measure.exit
+
+quantum_measure.exit:                             ; preds = %bb1.i7, %bb17
+  switch i32 undef, label %bb21 [
+    i32 -1, label %bb18
+    i32 0, label %bb20
+  ]
+
+bb18:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb20:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb21:                                             ; preds = %quantum_measure.exit
+  br i1 undef, label %quantum_frac_approx.exit, label %bb1.i
+
+bb1.i:                                            ; preds = %bb21
+  unreachable
+
+quantum_frac_approx.exit:                         ; preds = %bb21
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %quantum_frac_approx.exit
+  unreachable
+
+bb26:                                             ; preds = %quantum_frac_approx.exit
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb26
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+quantum_gcd.exit:                                 ; preds = %bb.i, %bb26
+  br i1 undef, label %bb32, label %bb33
+
+bb32:                                             ; preds = %quantum_gcd.exit
+  br i1 undef, label %bb.i.i, label %quantum_delete_qureg.exit
+
+bb.i.i:                                           ; preds = %bb32
+  ret i32 0
+
+quantum_delete_qureg.exit:                        ; preds = %bb32
+  ret i32 0
+
+bb33:                                             ; preds = %quantum_gcd.exit
+  unreachable
+}

diff --git a/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 0000000..d37d4b8
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll

@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+; rdar://7247745
+
+%struct._lck_mtx_ = type { %union.anon }
+%struct._lck_rw_t_internal_ = type <{ i16, i8, i8, i32, i32, i32 }>
+%struct.anon = type { i64, i64, [2 x i8], i8, i8, i32 }
+%struct.memory_object = type { i32, i32, %struct.memory_object_pager_ops* }
+%struct.memory_object_control = type { i32, i32, %struct.vm_object* }
+%struct.memory_object_pager_ops = type { void (%struct.memory_object*)*, void (%struct.memory_object*)*, i32 (%struct.memory_object*, %struct.memory_object_control*, i32)*, i32 (%struct.memory_object*)*, i32 (%struct.memory_object*, i64, i32, i32, i32*)*, i32 (%struct.memory_object*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.memory_object*, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i32)*, i32 (%struct.memory_object*)*, i8* }
+%struct.queue_entry = type { %struct.queue_entry*, %struct.queue_entry* }
+%struct.upl = type { %struct._lck_mtx_, i32, i32, %struct.vm_object*, i64, i32, i64, %struct.vm_object*, i32, i8* }
+%struct.upl_page_info = type <{ i32, i8, [3 x i8] }>
+%struct.vm_object = type { %struct.queue_entry, %struct._lck_rw_t_internal_, i64, %struct.vm_page*, i32, i32, i32, i32, %struct.vm_object*, %struct.vm_object*, i64, %struct.memory_object*, i64, %struct.memory_object_control*, i32, i16, i16, [2 x i8], i8, i8, %struct.queue_entry, %struct.queue_entry, i64, i32, i32, i32, i8*, i64, i8, i8, [2 x i8], %struct.queue_entry }
+%struct.vm_page = type { %struct.queue_entry, %struct.queue_entry, %struct.vm_page*, %struct.vm_object*, i64, [2 x i8], i8, i8, i32, i8, i8, i8, i8, i32 }
+%union.anon = type { %struct.anon }
+
+declare i64 @OSAddAtomic64(i64, i64*) noredzone noimplicitfloat
+
+define i32 @upl_commit_range(%struct.upl* %upl, i32 %offset, i32 %size, i32 %flags, %struct.upl_page_info* %page_list, i32 %count, i32* nocapture %empty) nounwind noredzone noimplicitfloat {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %if.end143, label %if.then136
+
+if.then136:                                       ; preds = %if.end
+  unreachable
+
+if.end143:                                        ; preds = %if.end
+  br i1 undef, label %if.else155, label %if.then153
+
+if.then153:                                       ; preds = %if.end143
+  br label %while.cond
+
+if.else155:                                       ; preds = %if.end143
+  unreachable
+
+while.cond:                                       ; preds = %if.end1039, %if.then153
+  br i1 undef, label %if.then1138, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.end260, label %if.then217
+
+if.then217:                                       ; preds = %while.body
+  br i1 undef, label %if.end260, label %if.then230
+
+if.then230:                                       ; preds = %if.then217
+  br i1 undef, label %if.then246, label %if.end260
+
+if.then246:                                       ; preds = %if.then230
+  br label %if.end260
+
+if.end260:                                        ; preds = %if.then246, %if.then230, %if.then217, %while.body
+  br i1 undef, label %if.end296, label %if.then266
+
+if.then266:                                       ; preds = %if.end260
+  unreachable
+
+if.end296:                                        ; preds = %if.end260
+  br i1 undef, label %if.end1039, label %if.end306
+
+if.end306:                                        ; preds = %if.end296
+  br i1 undef, label %if.end796, label %if.then616
+
+if.then616:                                       ; preds = %if.end306
+  br i1 undef, label %commit_next_page, label %do.body716
+
+do.body716:                                       ; preds = %if.then616
+  %call721 = call i64 @OSAddAtomic64(i64 1, i64* undef) nounwind noredzone noimplicitfloat ; <i64> [#uses=0]
+  call void asm sideeffect "movq\090x0($0),%rdi\0A\09movq\090x8($0),%rsi\0A\09.section __DATA, __data\0A\09.globl __dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec\0A\09__dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec:.quad 1f\0A\09.text\0A\091:nop\0A\09nop\0A\09nop\0A\09", "r,~{memory},~{di},~{si},~{dirflag},~{fpsr},~{flags}"(i64* undef) nounwind
+  br label %commit_next_page
+
+if.end796:                                        ; preds = %if.end306
+  unreachable
+
+commit_next_page:                                 ; preds = %do.body716, %if.then616
+  br i1 undef, label %if.end1039, label %if.then1034
+
+if.then1034:                                      ; preds = %commit_next_page
+  br label %if.end1039
+
+if.end1039:                                       ; preds = %if.then1034, %commit_next_page, %if.end296
+  br label %while.cond
+
+if.then1138:                                      ; preds = %while.cond
+  unreachable
+
+if.then:                                          ; preds = %entry
+  ret i32 4
+}

diff --git a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
new file mode 100644
index 0000000..91c5440
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll

@@ -0,0 +1,264 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 2
+; rdar://7274692
+
+%0 = type { [125 x i32] }
+%1 = type { i32 }
+%struct..5sPragmaType = type { i8*, i32 }
+%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+%struct.AuxData = type { i8*, void (i8*)* }
+%struct.Bitvec = type { i32, i32, i32, %0 }
+%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+%struct.Context = type { i64, i32, %struct.Fifo }
+%struct.CountCtx = type { i64 }
+%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct..5sPragmaType, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct..5sPragmaType, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct..5sPragmaType, i32, i64 }
+%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+%struct._OvflCell = type { i8*, i16 }
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__sFILEX = type opaque
+%struct._ht = type { i32, %struct.HashElem* }
+%struct.callback_data = type { %struct.sqlite3*, i32, i32, %struct.FILE*, i32, i32, i32, i8*, [20 x i8], [100 x i32], [100 x i32], [20 x i8], %struct.previous_mode_data, [1024 x i8], i8* }
+%struct.previous_mode_data = type { i32, i32, i32, [100 x i32] }
+%struct.sColMap = type { i32, i8* }
+%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %union.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+%struct.sqlite3InitInfo = type { i32, i32, i8 }
+%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+%struct.sqlite3_mutex = type opaque
+%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+%union.anon = type { double }
+
+@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=2]
+@__stderrp = external global %struct.FILE*        ; <%struct.FILE**> [#uses=1]
+@.str10 = internal constant [16 x i8] c"Out of memory!\0A\00", align 1 ; <[16 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.callback_data*, i8*)* @set_table_name to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @set_table_name(%struct.callback_data* nocapture %p, i8* %zName) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.callback_data* %p, i32 0, i32 7 ; <i8**> [#uses=3]
+  %1 = load i8** %0, align 4                      ; <i8*> [#uses=2]
+  %2 = icmp eq i8* %1, null                       ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  free i8* %1
+  store i8* null, i8** %0, align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb, %entry
+  %3 = icmp eq i8* %zName, null                   ; <i1> [#uses=1]
+  br i1 %3, label %return, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %4 = load i8* %zName, align 1                   ; <i8> [#uses=2]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=2]
+  %6 = icmp sgt i8 %4, -1                         ; <i1> [#uses=1]
+  br i1 %6, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:                                           ; preds = %bb2
+  %7 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %5 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=1]
+  %9 = and i32 %8, 256                            ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+bb1.i.i:                                          ; preds = %bb2
+  %10 = tail call i32 @__maskrune(i32 %5, i32 256) nounwind ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+isalpha.exit:                                     ; preds = %bb1.i.i, %bb.i.i
+  %storemerge.in.in.i.i = phi i32 [ %9, %bb.i.i ], [ %10, %bb1.i.i ] ; <i32> [#uses=1]
+  %storemerge.in.i.i = icmp eq i32 %storemerge.in.in.i.i, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i, label %bb3, label %bb5
+
+bb3:                                              ; preds = %isalpha.exit
+  %11 = load i8* %zName, align 1                  ; <i8> [#uses=2]
+  %12 = icmp eq i8 %11, 95                        ; <i1> [#uses=1]
+  br i1 %12, label %bb5, label %bb12.preheader
+
+bb5:                                              ; preds = %bb3, %isalpha.exit
+  %.pre = load i8* %zName, align 1                ; <i8> [#uses=1]
+  br label %bb12.preheader
+
+bb12.preheader:                                   ; preds = %bb5, %bb3
+  %13 = phi i8 [ %.pre, %bb5 ], [ %11, %bb3 ]     ; <i8> [#uses=1]
+  %needQuote.1.ph = phi i32 [ 0, %bb5 ], [ 1, %bb3 ] ; <i32> [#uses=2]
+  %14 = icmp eq i8 %13, 0                         ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb7
+
+bb7:                                              ; preds = %bb11, %bb12.preheader
+  %i.011 = phi i32 [ %tmp17, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=2]
+  %n.110 = phi i32 [ %26, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=3]
+  %needQuote.19 = phi i32 [ %needQuote.0, %bb11 ], [ %needQuote.1.ph, %bb12.preheader ] ; <i32> [#uses=2]
+  %scevgep16 = getelementptr i8* %zName, i32 %i.011 ; <i8*> [#uses=2]
+  %tmp17 = add i32 %i.011, 1                      ; <i32> [#uses=2]
+  %scevgep18 = getelementptr i8* %zName, i32 %tmp17 ; <i8*> [#uses=1]
+  %15 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %16 = zext i8 %15 to i32                        ; <i32> [#uses=2]
+  %17 = icmp sgt i8 %15, -1                       ; <i1> [#uses=1]
+  br i1 %17, label %bb.i.i2, label %bb1.i.i3
+
+bb.i.i2:                                          ; preds = %bb7
+  %18 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %16 ; <i32*> [#uses=1]
+  %19 = load i32* %18, align 4                    ; <i32> [#uses=1]
+  %20 = and i32 %19, 1280                         ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+bb1.i.i3:                                         ; preds = %bb7
+  %21 = tail call i32 @__maskrune(i32 %16, i32 1280) nounwind ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+isalnum.exit:                                     ; preds = %bb1.i.i3, %bb.i.i2
+  %storemerge.in.in.i.i4 = phi i32 [ %20, %bb.i.i2 ], [ %21, %bb1.i.i3 ] ; <i32> [#uses=1]
+  %storemerge.in.i.i5 = icmp eq i32 %storemerge.in.in.i.i4, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i5, label %bb8, label %bb11
+
+bb8:                                              ; preds = %isalnum.exit
+  %22 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %23 = icmp eq i8 %22, 95                        ; <i1> [#uses=1]
+  br i1 %23, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %24 = icmp eq i8 %22, 39                        ; <i1> [#uses=1]
+  %25 = zext i1 %24 to i32                        ; <i32> [#uses=1]
+  %.n.1 = add i32 %n.110, %25                     ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb8, %isalnum.exit
+  %needQuote.0 = phi i32 [ 1, %bb9 ], [ %needQuote.19, %isalnum.exit ], [ %needQuote.19, %bb8 ] ; <i32> [#uses=2]
+  %n.0 = phi i32 [ %.n.1, %bb9 ], [ %n.110, %isalnum.exit ], [ %n.110, %bb8 ] ; <i32> [#uses=1]
+  %26 = add nsw i32 %n.0, 1                       ; <i32> [#uses=2]
+  %27 = load i8* %scevgep18, align 1              ; <i8> [#uses=1]
+  %28 = icmp eq i8 %27, 0                         ; <i1> [#uses=1]
+  br i1 %28, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11, %bb12.preheader
+  %n.1.lcssa = phi i32 [ 0, %bb12.preheader ], [ %26, %bb11 ] ; <i32> [#uses=2]
+  %needQuote.1.lcssa = phi i32 [ %needQuote.1.ph, %bb12.preheader ], [ %needQuote.0, %bb11 ] ; <i32> [#uses=1]
+  %29 = add nsw i32 %n.1.lcssa, 2                 ; <i32> [#uses=1]
+  %30 = icmp eq i32 %needQuote.1.lcssa, 0         ; <i1> [#uses=3]
+  %n.1. = select i1 %30, i32 %n.1.lcssa, i32 %29  ; <i32> [#uses=1]
+  %31 = add nsw i32 %n.1., 1                      ; <i32> [#uses=1]
+  %32 = malloc i8, i32 %31                        ; <i8*> [#uses=7]
+  store i8* %32, i8** %0, align 4
+  %33 = icmp eq i8* %32, null                     ; <i1> [#uses=1]
+  br i1 %33, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb13
+  %34 = load %struct.FILE** @__stderrp, align 4   ; <%struct.FILE*> [#uses=1]
+  %35 = bitcast %struct.FILE* %34 to i8*          ; <i8*> [#uses=1]
+  %36 = tail call i32 @"\01_fwrite$UNIX2003"(i8* getelementptr inbounds ([16 x i8]* @.str10, i32 0, i32 0), i32 1, i32 15, i8* %35) nounwind ; <i32> [#uses=0]
+  tail call void @exit(i32 1) noreturn nounwind
+  unreachable
+
+bb17:                                             ; preds = %bb13
+  br i1 %30, label %bb23.preheader, label %bb18
+
+bb18:                                             ; preds = %bb17
+  store i8 39, i8* %32, align 4
+  br label %bb23.preheader
+
+bb23.preheader:                                   ; preds = %bb18, %bb17
+  %n.3.ph = phi i32 [ 1, %bb18 ], [ 0, %bb17 ]    ; <i32> [#uses=2]
+  %37 = load i8* %zName, align 1                  ; <i8> [#uses=1]
+  %38 = icmp eq i8 %37, 0                         ; <i1> [#uses=1]
+  br i1 %38, label %bb24, label %bb20
+
+bb20:                                             ; preds = %bb22, %bb23.preheader
+  %storemerge18 = phi i32 [ %tmp, %bb22 ], [ 0, %bb23.preheader ] ; <i32> [#uses=2]
+  %n.37 = phi i32 [ %n.4, %bb22 ], [ %n.3.ph, %bb23.preheader ] ; <i32> [#uses=3]
+  %scevgep = getelementptr i8* %zName, i32 %storemerge18 ; <i8*> [#uses=1]
+  %tmp = add i32 %storemerge18, 1                 ; <i32> [#uses=2]
+  %scevgep15 = getelementptr i8* %zName, i32 %tmp ; <i8*> [#uses=1]
+  %39 = load i8* %scevgep, align 1                ; <i8> [#uses=2]
+  %40 = getelementptr inbounds i8* %32, i32 %n.37 ; <i8*> [#uses=1]
+  store i8 %39, i8* %40, align 1
+  %41 = add nsw i32 %n.37, 1                      ; <i32> [#uses=2]
+  %42 = icmp eq i8 %39, 39                        ; <i1> [#uses=1]
+  br i1 %42, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb20
+  %43 = getelementptr inbounds i8* %32, i32 %41   ; <i8*> [#uses=1]
+  store i8 39, i8* %43, align 1
+  %44 = add nsw i32 %n.37, 2                      ; <i32> [#uses=1]
+  br label %bb22
+
+bb22:                                             ; preds = %bb21, %bb20
+  %n.4 = phi i32 [ %44, %bb21 ], [ %41, %bb20 ]   ; <i32> [#uses=2]
+  %45 = load i8* %scevgep15, align 1              ; <i8> [#uses=1]
+  %46 = icmp eq i8 %45, 0                         ; <i1> [#uses=1]
+  br i1 %46, label %bb24, label %bb20
+
+bb24:                                             ; preds = %bb22, %bb23.preheader
+  %n.3.lcssa = phi i32 [ %n.3.ph, %bb23.preheader ], [ %n.4, %bb22 ] ; <i32> [#uses=3]
+  br i1 %30, label %bb26, label %bb25
+
+bb25:                                             ; preds = %bb24
+  %47 = getelementptr inbounds i8* %32, i32 %n.3.lcssa ; <i8*> [#uses=1]
+  store i8 39, i8* %47, align 1
+  %48 = add nsw i32 %n.3.lcssa, 1                 ; <i32> [#uses=1]
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb24
+  %n.5 = phi i32 [ %48, %bb25 ], [ %n.3.lcssa, %bb24 ] ; <i32> [#uses=1]
+  %49 = getelementptr inbounds i8* %32, i32 %n.5  ; <i8*> [#uses=1]
+  store i8 0, i8* %49, align 1
+  ret void
+
+return:                                           ; preds = %bb1
+  ret void
+}
+
+declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*)
+
+declare void @exit(i32) noreturn nounwind
+
+declare i32 @__maskrune(i32, i32)

diff --git a/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll b/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll
new file mode 100644
index 0000000..c1aa17c
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://7299435
+
+@i = internal global i32 0                        ; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (i16)* @foo to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @foo(i16 signext %source) nounwind ssp {
+entry:
+  %source_addr = alloca i16, align 2              ; <i16*> [#uses=2]
+  store i16 %source, i16* %source_addr
+  store i32 4, i32* @i, align 4
+  call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  %asmtmp = call i16 asm sideeffect "movw $1, $0", "=={ax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* %source_addr) nounwind ; <i16> [#uses=0]
+  ret void
+}

diff --git a/test/CodeGen/X86/2009-10-19-EmergencySpill.ll b/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
new file mode 100644
index 0000000..ba44a2e
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-19-EmergencySpill.ll

@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim
+; rdar://7291624
+
+%union.RtreeCoord = type { float }
+%struct.RtreeCell = type { i64, [10 x %union.RtreeCoord] }
+%struct.Rtree = type { i32, i32*, i32, i32, i32, i32, i8*, i8* }
+%struct.RtreeNode = type { i32*, i64, i32, i32, i8*, i32* }
+
+define fastcc void @nodeOverwriteCell(%struct.Rtree* nocapture %pRtree, %struct.RtreeNode* nocapture %pNode, %struct.RtreeCell* nocapture %pCell, i32 %iCell) nounwind ssp {
+entry:
+  %0 = load i8** undef, align 8                   ; <i8*> [#uses=2]
+  %1 = load i32* undef, align 8                   ; <i32> [#uses=1]
+  %2 = mul i32 %1, %iCell                         ; <i32> [#uses=1]
+  %3 = add nsw i32 %2, 4                          ; <i32> [#uses=1]
+  %4 = sext i32 %3 to i64                         ; <i64> [#uses=2]
+  %5 = load i64* null, align 8                    ; <i64> [#uses=2]
+  %6 = lshr i64 %5, 48                            ; <i64> [#uses=1]
+  %7 = trunc i64 %6 to i8                         ; <i8> [#uses=1]
+  store i8 %7, i8* undef, align 1
+  %8 = lshr i64 %5, 8                             ; <i64> [#uses=1]
+  %9 = trunc i64 %8 to i8                         ; <i8> [#uses=1]
+  %.sum4 = add i64 %4, 6                          ; <i64> [#uses=1]
+  %10 = getelementptr inbounds i8* %0, i64 %.sum4 ; <i8*> [#uses=1]
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds %struct.Rtree* %pRtree, i64 0, i32 3 ; <i32*> [#uses=1]
+  br i1 undef, label %bb.nph, label %bb2
+
+bb.nph:                                           ; preds = %entry
+  %tmp25 = add i64 %4, 11                         ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=3]
+  %scevgep = getelementptr %struct.RtreeCell* %pCell, i64 0, i32 1, i64 %indvar ; <%union.RtreeCoord*> [#uses=1]
+  %scevgep12 = bitcast %union.RtreeCoord* %scevgep to i32* ; <i32*> [#uses=1]
+  %tmp = shl i64 %indvar, 2                       ; <i64> [#uses=1]
+  %tmp26 = add i64 %tmp, %tmp25                   ; <i64> [#uses=1]
+  %scevgep27 = getelementptr i8* %0, i64 %tmp26   ; <i8*> [#uses=1]
+  %12 = load i32* %scevgep12, align 4             ; <i32> [#uses=1]
+  %13 = lshr i32 %12, 24                          ; <i32> [#uses=1]
+  %14 = trunc i32 %13 to i8                       ; <i8> [#uses=1]
+  store i8 %14, i8* undef, align 1
+  store i8 undef, i8* %scevgep27, align 1
+  %15 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %16 = shl i32 %15, 1                            ; <i32> [#uses=1]
+  %17 = icmp sgt i32 %16, undef                   ; <i1> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 %17, label %bb, label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %18 = getelementptr inbounds %struct.RtreeNode* %pNode, i64 0, i32 3 ; <i32*> [#uses=1]
+  store i32 1, i32* %18, align 4
+  ret void
+}

diff --git a/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll b/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
new file mode 100644
index 0000000..d7f0c1a
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll

@@ -0,0 +1,69 @@
+; RUN: llvm-as <%s | llc | FileCheck %s
+; PR 5247
+; check that cmp is not scheduled before the add
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str76843 = external constant [45 x i8]          ; <[45 x i8]*> [#uses=1]
+@__profiling_callsite_timestamps_live = external global [1216 x i64] ; <[1216 x i64]*> [#uses=2]
+
+define i32 @cl_init(i32 %initoptions) nounwind {
+entry:
+  %retval.i = alloca i32                          ; <i32*> [#uses=3]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %initoptions.addr = alloca i32                  ; <i32*> [#uses=2]
+  tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %0 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  store i32 %initoptions, i32* %initoptions.addr
+  %1 = bitcast i32* %initoptions.addr to { }*     ; <{ }*> [#uses=0]
+  call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %2 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %call.i = call i32 @lt_dlinit() nounwind        ; <i32> [#uses=1]
+  %tobool.i = icmp ne i32 %call.i, 0              ; <i1> [#uses=1]
+  br i1 %tobool.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %entry
+  %call1.i = call i32 @warn_dlerror(i8* getelementptr inbounds ([45 x i8]* @.str76843, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+  store i32 -1, i32* %retval.i
+  br label %lt_init.exit
+
+if.end.i:                                         ; preds = %entry
+  store i32 0, i32* %retval.i
+  br label %lt_init.exit
+
+lt_init.exit:                                     ; preds = %if.end.i, %if.then.i
+  %3 = load i32* %retval.i                        ; <i32> [#uses=1]
+  call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %4 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %5 = sub i64 %4, %2                             ; <i64> [#uses=1]
+  %6 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 51), i64 %5) nounwind ; <i64> [#uses=0]
+;CHECK: lock
+;CHECK-NEXT: {{xadd|addq}} %rdx, __profiling_callsite_timestamps_live
+;CHECK-NEXT: cmpl $0,
+;CHECK-NEXT: jne
+  %cmp = icmp eq i32 %3, 0                        ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lt_init.exit
+  call void @cli_rarload()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lt_init.exit
+  store i32 0, i32* %retval
+  %7 = load i32* %retval                          ; <i32> [#uses=1]
+  tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %8 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %9 = sub i64 %8, %0                             ; <i64> [#uses=1]
+  %10 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 50), i64 %9) ; <i64> [#uses=0]
+  ret i32 %7
+}
+
+declare void @cli_rarload() nounwind
+
+declare i32 @lt_dlinit()
+
+declare i32 @warn_dlerror(i8*) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+declare i64 @llvm.readcyclecounter() nounwind

diff --git a/test/CodeGen/X86/2009-10-25-RewriterBug.ll b/test/CodeGen/X86/2009-10-25-RewriterBug.ll
new file mode 100644
index 0000000..5b4e818
--- /dev/null
+++ b/test/CodeGen/X86/2009-10-25-RewriterBug.ll

@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim
+
+%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+%struct.FrameStore = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture* }
+%struct.StorablePicture = type { i32, i32, i32, i32, i32, [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16**, i16***, i8*, i16**, i8***, i64***, i64***, i16****, i8**, i8**, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, %struct.DecRefPicMarking_t*, i32 }
+
+define fastcc void @insert_picture_in_dpb(%struct.FrameStore* nocapture %fs, %struct.StorablePicture* %p) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.FrameStore* %fs, i64 0, i32 12 ; <%struct.StorablePicture**> [#uses=1]
+  %1 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  br i1 %1, label %bb.i, label %bb36.i
+
+bb.i:                                             ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb14.preheader.i
+
+bb3.i:                                            ; preds = %bb.i
+  unreachable
+
+bb14.preheader.i:                                 ; preds = %bb.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb9.i:                                            ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb20.preheader.i:                                 ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb18.i, label %bb29.preheader.i
+
+bb18.i:                                           ; preds = %bb20.preheader.i
+  unreachable
+
+bb29.preheader.i:                                 ; preds = %bb20.preheader.i
+  br i1 undef, label %bb24.i, label %bb30.i
+
+bb24.i:                                           ; preds = %bb29.preheader.i
+  unreachable
+
+bb30.i:                                           ; preds = %bb29.preheader.i
+  store i32 undef, i32* undef, align 8
+  br label %bb67.preheader.i
+
+bb36.i:                                           ; preds = %entry
+  br label %bb67.preheader.i
+
+bb67.preheader.i:                                 ; preds = %bb36.i, %bb30.i
+  %2 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %3 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %4 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %5 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %6 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %7 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %8 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %9 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %10 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %11 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %12 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb38.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %13 = phi %struct.StorablePicture* [ %37, %bb66.i ], [ %2, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %14 = phi %struct.StorablePicture* [ %38, %bb66.i ], [ %3, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %15 = phi %struct.StorablePicture* [ %39, %bb66.i ], [ %4, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %16 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %5, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %17 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %6, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %18 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %7, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %19 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %8, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %20 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %9, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %21 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %10, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %22 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %11, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %23 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %12, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %indvar248.i = phi i64 [ %indvar.next249.i, %bb66.i ], [ 0, %bb67.preheader.i ] ; <i64> [#uses=3]
+  %storemerge52.i = trunc i64 %indvar248.i to i32 ; <i32> [#uses=1]
+  %24 = getelementptr inbounds %struct.StorablePicture* %23, i64 0, i32 19 ; <i32*> [#uses=0]
+  br i1 undef, label %bb.nph51.i, label %bb66.i
+
+bb.nph51.i:                                       ; preds = %bb38.i
+  %25 = sdiv i32 %storemerge52.i, 8               ; <i32> [#uses=0]
+  br label %bb39.i
+
+bb39.i:                                           ; preds = %bb64.i, %bb.nph51.i
+  %26 = phi %struct.StorablePicture* [ %17, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %27 = phi %struct.StorablePicture* [ %18, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %28 = phi %struct.StorablePicture* [ %19, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %29 = phi %struct.StorablePicture* [ %20, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %30 = phi %struct.StorablePicture* [ %21, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %31 = phi %struct.StorablePicture* [ %22, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb57.i, label %bb40.i
+
+bb40.i:                                           ; preds = %bb39.i
+  br i1 undef, label %bb57.i, label %bb41.i
+
+bb41.i:                                           ; preds = %bb40.i
+  %storemerge10.i = select i1 undef, i32 2, i32 4 ; <i32> [#uses=1]
+  %32 = zext i32 %storemerge10.i to i64           ; <i64> [#uses=1]
+  br i1 undef, label %bb45.i, label %bb47.i
+
+bb45.i:                                           ; preds = %bb41.i
+  %33 = getelementptr inbounds %struct.StorablePicture* %26, i64 0, i32 5, i64 undef, i64 %32, i64 undef ; <i64*> [#uses=1]
+  %34 = load i64* %33, align 8                    ; <i64> [#uses=1]
+  br label %bb47.i
+
+bb47.i:                                           ; preds = %bb45.i, %bb41.i
+  %storemerge11.i = phi i64 [ %34, %bb45.i ], [ 0, %bb41.i ] ; <i64> [#uses=0]
+  %scevgep246.i = getelementptr i64* undef, i64 undef ; <i64*> [#uses=0]
+  br label %bb64.i
+
+bb57.i:                                           ; preds = %bb40.i, %bb39.i
+  br i1 undef, label %bb58.i, label %bb60.i
+
+bb58.i:                                           ; preds = %bb57.i
+  br label %bb60.i
+
+bb60.i:                                           ; preds = %bb58.i, %bb57.i
+  %35 = load i64*** undef, align 8                ; <i64**> [#uses=1]
+  %scevgep256.i = getelementptr i64** %35, i64 %indvar248.i ; <i64**> [#uses=1]
+  %36 = load i64** %scevgep256.i, align 8         ; <i64*> [#uses=1]
+  %scevgep243.i = getelementptr i64* %36, i64 undef ; <i64*> [#uses=1]
+  store i64 -1, i64* %scevgep243.i, align 8
+  br label %bb64.i
+
+bb64.i:                                           ; preds = %bb60.i, %bb47.i
+  br i1 undef, label %bb39.i, label %bb66.i
+
+bb66.i:                                           ; preds = %bb64.i, %bb38.i
+  %37 = phi %struct.StorablePicture* [ %13, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %38 = phi %struct.StorablePicture* [ %14, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %39 = phi %struct.StorablePicture* [ %15, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %40 = phi %struct.StorablePicture* [ %16, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=8]
+  %indvar.next249.i = add i64 %indvar248.i, 1     ; <i64> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb68.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %41 = phi %struct.StorablePicture* [ %2, %bb67.preheader.i ], [ %37, %bb66.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %42 = phi %struct.StorablePicture* [ %3, %bb67.preheader.i ], [ %38, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %43 = phi %struct.StorablePicture* [ %4, %bb67.preheader.i ], [ %39, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph48.i, label %bb108.i
+
+bb.nph48.i:                                       ; preds = %bb68.i
+  br label %bb80.i
+
+bb80.i:                                           ; preds = %bb104.i, %bb.nph48.i
+  %44 = phi %struct.StorablePicture* [ %42, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %45 = phi %struct.StorablePicture* [ %43, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph39.i, label %bb104.i
+
+bb.nph39.i:                                       ; preds = %bb80.i
+  br label %bb81.i
+
+bb81.i:                                           ; preds = %bb102.i, %bb.nph39.i
+  %46 = phi %struct.StorablePicture* [ %44, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %47 = phi %struct.StorablePicture* [ %45, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb83.i, label %bb82.i
+
+bb82.i:                                           ; preds = %bb81.i
+  br i1 undef, label %bb83.i, label %bb101.i
+
+bb83.i:                                           ; preds = %bb82.i, %bb81.i
+  br label %bb102.i
+
+bb101.i:                                          ; preds = %bb82.i
+  br label %bb102.i
+
+bb102.i:                                          ; preds = %bb101.i, %bb83.i
+  %48 = load %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
+  br i1 undef, label %bb81.i, label %bb104.i
+
+bb104.i:                                          ; preds = %bb102.i, %bb80.i
+  br label %bb80.i
+
+bb108.i:                                          ; preds = %bb68.i
+  unreachable
+}

diff --git a/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll b/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll
new file mode 100644
index 0000000..b5be65f
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 | FileCheck %s
+; rdar://7362871
+
+define void @bar(i32 %b, i32 %a) nounwind optsize ssp {
+entry:
+; CHECK:     leal 15(%rsi), %edi
+; CHECK-NOT: movl
+; CHECK:     _foo
+  %0 = add i32 %a, 15                             ; <i32> [#uses=1]
+  %1 = zext i32 %0 to i64                         ; <i64> [#uses=1]
+  tail call void @foo(i64 %1) nounwind
+  ret void
+}
+
+declare void @foo(i64)

diff --git a/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
new file mode 100644
index 0000000..5398eef
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll

@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; rdar://7394770
+
+%struct.JVTLib_100487 = type <{ i8 }>
+
+define i32 @_Z13JVTLib_10335613JVTLib_10266513JVTLib_100579S_S_S_jPhj(i16* nocapture %ResidualX_Array.0, %struct.JVTLib_100487* nocapture byval align 4 %xqp, i16* nocapture %ResidualL_Array.0, i16* %ResidualDCZ_Array.0, i16* nocapture %ResidualACZ_FOArray.0, i32 %useFRextDequant, i8* nocapture %JVTLib_103357, i32 %use_field_scan) ssp {
+bb.nph:
+  %0 = shl i32 undef, 1                           ; <i32> [#uses=2]
+  %mask133.masked.masked.masked.masked.masked.masked = or i640 undef, undef ; <i640> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit, %bb.nph
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb2:                                              ; preds = %bb
+  unreachable
+
+bb.i:                                             ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+bb1.i:                                            ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit: ; preds = %bb1.i, %bb.i
+  br i1 undef, label %bb5, label %bb
+
+bb5:                                              ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+  %mask271.masked.masked.masked.masked.masked.masked.masked = or i256 0, undef ; <i256> [#uses=2]
+  %mask266.masked.masked.masked.masked.masked.masked = or i256 %mask271.masked.masked.masked.masked.masked.masked.masked, undef ; <i256> [#uses=1]
+  %mask241.masked = or i256 undef, undef          ; <i256> [#uses=1]
+  %ins237 = or i256 undef, 0                      ; <i256> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br i1 undef, label %bb12.i, label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+bb12.i:                                           ; preds = %bb9
+  br label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb12.i, %bb9
+  ret i32 undef
+
+bb10:                                             ; preds = %bb5
+  %1 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %2 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %3 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %4 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %5 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %6 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp211 = lshr i256 %mask271.masked.masked.masked.masked.masked.masked.masked, 112 ; <i256> [#uses=0]
+  %7 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp208 = lshr i256 %mask266.masked.masked.masked.masked.masked.masked, 128 ; <i256> [#uses=1]
+  %tmp209 = trunc i256 %tmp208 to i16             ; <i16> [#uses=1]
+  %8 = sext i16 %tmp209 to i32                    ; <i32> [#uses=1]
+  %9 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %10 = sext i16 undef to i32                     ; <i32> [#uses=1]
+  %tmp193 = lshr i256 %mask241.masked, 208        ; <i256> [#uses=1]
+  %tmp194 = trunc i256 %tmp193 to i16             ; <i16> [#uses=1]
+  %11 = sext i16 %tmp194 to i32                   ; <i32> [#uses=1]
+  %tmp187 = lshr i256 %ins237, 240                ; <i256> [#uses=1]
+  %tmp188 = trunc i256 %tmp187 to i16             ; <i16> [#uses=1]
+  %12 = sext i16 %tmp188 to i32                   ; <i32> [#uses=1]
+  %13 = add nsw i32 %4, %1                        ; <i32> [#uses=1]
+  %14 = add nsw i32 %5, 0                         ; <i32> [#uses=1]
+  %15 = add nsw i32 %6, %2                        ; <i32> [#uses=1]
+  %16 = add nsw i32 %7, %3                        ; <i32> [#uses=1]
+  %17 = add nsw i32 0, %8                         ; <i32> [#uses=1]
+  %18 = add nsw i32 %11, %9                       ; <i32> [#uses=1]
+  %19 = add nsw i32 0, %10                        ; <i32> [#uses=1]
+  %20 = add nsw i32 %12, 0                        ; <i32> [#uses=1]
+  %21 = add nsw i32 %17, %13                      ; <i32> [#uses=2]
+  %22 = add nsw i32 %18, %14                      ; <i32> [#uses=2]
+  %23 = add nsw i32 %19, %15                      ; <i32> [#uses=2]
+  %24 = add nsw i32 %20, %16                      ; <i32> [#uses=2]
+  %25 = add nsw i32 %22, %21                      ; <i32> [#uses=2]
+  %26 = add nsw i32 %24, %23                      ; <i32> [#uses=2]
+  %27 = sub i32 %21, %22                          ; <i32> [#uses=1]
+  %28 = sub i32 %23, %24                          ; <i32> [#uses=1]
+  %29 = add nsw i32 %26, %25                      ; <i32> [#uses=1]
+  %30 = sub i32 %25, %26                          ; <i32> [#uses=1]
+  %31 = sub i32 %27, %28                          ; <i32> [#uses=1]
+  %32 = ashr i32 %29, 1                           ; <i32> [#uses=2]
+  %33 = ashr i32 %30, 1                           ; <i32> [#uses=2]
+  %34 = ashr i32 %31, 1                           ; <i32> [#uses=2]
+  %35 = icmp sgt i32 %32, 32767                   ; <i1> [#uses=1]
+  %o0_0.0.i = select i1 %35, i32 32767, i32 %32   ; <i32> [#uses=2]
+  %36 = icmp slt i32 %o0_0.0.i, -32768            ; <i1> [#uses=1]
+  %37 = icmp sgt i32 %33, 32767                   ; <i1> [#uses=1]
+  %o1_0.0.i = select i1 %37, i32 32767, i32 %33   ; <i32> [#uses=2]
+  %38 = icmp slt i32 %o1_0.0.i, -32768            ; <i1> [#uses=1]
+  %39 = icmp sgt i32 %34, 32767                   ; <i1> [#uses=1]
+  %o2_0.0.i = select i1 %39, i32 32767, i32 %34   ; <i32> [#uses=2]
+  %40 = icmp slt i32 %o2_0.0.i, -32768            ; <i1> [#uses=1]
+  %tmp101 = lshr i640 %mask133.masked.masked.masked.masked.masked.masked, 256 ; <i640> [#uses=1]
+  %41 = trunc i32 %o0_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp358 = select i1 %36, i16 -32768, i16 %41    ; <i16> [#uses=2]
+  %42 = trunc i32 %o1_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp347 = select i1 %38, i16 -32768, i16 %42    ; <i16> [#uses=1]
+  %43 = trunc i32 %o2_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp335 = select i1 %40, i16 -32768, i16 %43    ; <i16> [#uses=1]
+  %44 = icmp sgt i16 %tmp358, -1                  ; <i1> [#uses=2]
+  %..i24 = select i1 %44, i16 %tmp358, i16 undef  ; <i16> [#uses=1]
+  %45 = icmp sgt i16 %tmp347, -1                  ; <i1> [#uses=1]
+  %46 = icmp sgt i16 %tmp335, -1                  ; <i1> [#uses=1]
+  %47 = zext i16 %..i24 to i32                    ; <i32> [#uses=1]
+  %tmp = trunc i640 %tmp101 to i32                ; <i32> [#uses=1]
+  %48 = and i32 %tmp, 65535                       ; <i32> [#uses=2]
+  %49 = mul i32 %47, %48                          ; <i32> [#uses=1]
+  %50 = zext i16 undef to i32                     ; <i32> [#uses=1]
+  %51 = mul i32 %50, %48                          ; <i32> [#uses=1]
+  %52 = add i32 %49, %0                           ; <i32> [#uses=1]
+  %53 = add i32 %51, %0                           ; <i32> [#uses=1]
+  %54 = lshr i32 %52, undef                       ; <i32> [#uses=1]
+  %55 = lshr i32 %53, undef                       ; <i32> [#uses=1]
+  %56 = trunc i32 %54 to i16                      ; <i16> [#uses=1]
+  %57 = trunc i32 %55 to i16                      ; <i16> [#uses=1]
+  %vs16Out0_0.0.i = select i1 %44, i16 %56, i16 undef ; <i16> [#uses=1]
+  %vs16Out0_4.0.i = select i1 %45, i16 0, i16 undef ; <i16> [#uses=1]
+  %vs16Out1_0.0.i = select i1 %46, i16 %57, i16 undef ; <i16> [#uses=1]
+  br i1 undef, label %bb129.i, label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+bb129.i:                                          ; preds = %bb10
+  br label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb129.i, %bb10
+  %58 = phi i16 [ %vs16Out0_4.0.i, %bb129.i ], [ undef, %bb10 ] ; <i16> [#uses=0]
+  %59 = phi i16 [ undef, %bb129.i ], [ %vs16Out1_0.0.i, %bb10 ] ; <i16> [#uses=0]
+  store i16 %vs16Out0_0.0.i, i16* %ResidualDCZ_Array.0, align 2
+  unreachable
+}

diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
new file mode 100644
index 0000000..8f274df
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7395200
+
+@g = common global [4 x float] zeroinitializer, align 16 ; <[4 x float]*> [#uses=4]
+
+define void @foo(i32 %n, float* nocapture %x) nounwind ssp {
+entry:
+; CHECK: foo:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+; CHECK: movq _g@GOTPCREL(%rip), [[REG:%[a-z]+]]
+  %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: LBB1_2:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
+  %tmp9 = shl i64 %indvar, 2                      ; <i64> [#uses=4]
+  %tmp1016 = or i64 %tmp9, 1                      ; <i64> [#uses=1]
+  %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1]
+  %tmp1117 = or i64 %tmp9, 2                      ; <i64> [#uses=1]
+  %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1]
+  %tmp1318 = or i64 %tmp9, 3                      ; <i64> [#uses=1]
+  %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1]
+  %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1]
+  %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+  store float %1, float* %x_addr.03, align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+  store float %2, float* %scevgep, align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+  store float %3, float* %scevgep12, align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+  store float %4, float* %scevgep14, align 4
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}

diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
new file mode 100644
index 0000000..3ce9edb
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7396984
+
+@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
+
+define void @t(i32 %count) ssp nounwind {
+entry:
+; CHECK: t:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movups L_str(%rip), %xmm0
+  %tmp0 = alloca [60 x i8], align 1
+  %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
+  br label %bb1
+
+bb1:
+; CHECK: LBB1_1:
+; CHECK: movaps %xmm0, (%rsp)
+  %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
+  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  %tmp3 = add i32 %tmp2, 1
+  %tmp4 = icmp eq i32 %tmp3, %count
+  br i1 %tmp4, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind

diff --git a/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
new file mode 100644
index 0000000..5c1a2bc
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll

@@ -0,0 +1,52 @@
+; RUN: llc -O3 < %s
+; This test fails with:
+; Assertion failed: (!B && "UpdateTerminators requires analyzable predecessors!"), function updateTerminator, MachineBasicBlock.cpp, line 255.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%"struct.llvm::InlineAsm::ConstraintInfo" = type { i32, i8, i8, i8, i8, %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" = type { %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"* }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::string"*, %"struct.std::string"*, %"struct.std::string"* }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+%"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" }
+%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+
+define zeroext i8 @_ZN4llvm9InlineAsm14ConstraintInfo5ParseENS_9StringRefERSt6vectorIS1_SaIS1_EE(%"struct.llvm::InlineAsm::ConstraintInfo"* nocapture %this, i64 %Str.0, i64 %Str.1, %"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >"* nocapture %ConstraintsSoFar) nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb56, label %bb27.outer
+
+bb8:                                              ; preds = %bb27.outer108, %bb13
+  switch i8 undef, label %bb27.outer [
+    i8 35, label %bb56
+    i8 37, label %bb14
+    i8 38, label %bb10
+    i8 42, label %bb56
+  ]
+
+bb27.outer:                                       ; preds = %bb8, %entry
+  %I.2.ph = phi i8* [ undef, %entry ], [ %I.2.ph109, %bb8 ] ; <i8*> [#uses=2]
+  br label %bb27.outer108
+
+bb10:                                             ; preds = %bb8
+  %toBool = icmp eq i8 0, 0                       ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %toBool                ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb13, label %bb56
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb27.outer108, label %bb8
+
+bb14:                                             ; preds = %bb8
+  ret i8 1
+
+bb27.outer108:                                    ; preds = %bb13, %bb27.outer
+  %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
+  %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
+  br label %bb8
+
+bb56:                                             ; preds = %bb10, %bb8, %bb8, %entry
+  ret i8 1
+}

diff --git a/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll b/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
new file mode 100644
index 0000000..0edaa70
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s
+; PR 5300
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@g_296 = external global i8, align 1              ; <i8*> [#uses=1]
+
+define noalias i8** @func_31(i32** nocapture %int8p_33, i8** nocapture %p_34, i8* nocapture %p_35) nounwind {
+entry:
+  %cmp.i = icmp sgt i16 undef, 234                ; <i1> [#uses=1]
+  %tmp17 = select i1 %cmp.i, i16 undef, i16 0     ; <i16> [#uses=2]
+  %conv8 = trunc i16 %tmp17 to i8                 ; <i8> [#uses=3]
+  br i1 undef, label %cond.false.i29, label %land.lhs.true.i
+
+land.lhs.true.i:                                  ; preds = %entry
+  %tobool5.i = icmp eq i32 undef, undef           ; <i1> [#uses=1]
+  br i1 %tobool5.i, label %cond.false.i29, label %bar.exit
+
+cond.false.i29:                                   ; preds = %land.lhs.true.i, %entry
+  %tmp = sub i8 0, %conv8                         ; <i8> [#uses=1]
+  %mul.i = and i8 %conv8, %tmp                    ; <i8> [#uses=1]
+  br label %bar.exit
+
+bar.exit:                                         ; preds = %cond.false.i29, %land.lhs.true.i
+  %call1231 = phi i8 [ %mul.i, %cond.false.i29 ], [ %conv8, %land.lhs.true.i ] ; <i8> [#uses=0]
+  %conv21 = trunc i16 %tmp17 to i8                ; <i8> [#uses=1]
+  store i8 %conv21, i8* @g_296
+  ret i8** undef
+}

diff --git a/test/CodeGen/X86/2009-11-25-ImpDefBug.ll b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
new file mode 100644
index 0000000..7606c0e
--- /dev/null
+++ b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll

@@ -0,0 +1,116 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; pr5600
+
+%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+%struct.ASN1ObjHeader = type { i8, %"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>", i64, i32, i32, i32 }
+%struct.ASN1Object = type { i32 (...)**, i32, i32, i64 }
+%struct.ASN1Unit = type { [4 x i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)*], %"struct.std::ASN1ObjList" }
+%"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>" = type { [1 x %struct.__mpz_struct] }
+%struct.__mpz_struct = type { i32, i32, i64* }
+%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+%struct.pthread_attr_t = type { i64, [48 x i8] }
+%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+%struct.pthread_mutexattr_t = type { i32 }
+%"struct.std::ASN1ObjList" = type { %"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >" }
+%"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >" = type { %"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >::_Vector_impl" }
+%"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >::_Vector_impl" = type { %struct.ASN1Object**, %struct.ASN1Object**, %struct.ASN1Object** }
+%struct.xmstream = type { i8*, i64, i64, i64, i8 }
+
+declare void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* nocapture, i64, %struct.ASN1Object** nocapture)
+
+declare i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream*, %struct.ASN1Object**, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)**)
+
+define i32 @_ZN8ASN1Unit4loadER8xmstreamjm18ASN1LengthEncoding(%struct.ASN1Unit* %this, %struct.xmstream* nocapture %stream, i32 %numObjects, i64 %size, i32 %lEncoding) {
+entry:
+  br label %meshBB85
+
+bb5:                                              ; preds = %bb13.fragment.cl135, %bb13.fragment.cl, %bb.i.i.bbcl.disp, %bb13.fragment
+  %0 = invoke i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream* undef, %struct.ASN1Object** undef, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad ; <i32> [#uses=0]
+
+bb10.fragment:                                    ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %bb1.i.fragment.bbcl.disp, label %bb.i.i.bbcl.disp
+
+bb1.i.fragment:                                   ; preds = %bb1.i.fragment.bbcl.disp
+  invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad
+
+bb13.fragment:                                    ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb.i4:                                            ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp
+  ret i32 undef
+
+bb1.i5:                                           ; preds = %bb.i1
+  ret i32 undef
+
+lpad:                                             ; preds = %bb1.i.fragment.cl, %bb1.i.fragment, %bb5
+  %.SV10.phi807 = phi i8* [ undef, %bb1.i.fragment.cl ], [ undef, %bb1.i.fragment ], [ undef, %bb5 ] ; <i8*> [#uses=1]
+  %1 = load i8* %.SV10.phi807, align 8            ; <i8> [#uses=0]
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
+
+bb.i1:                                            ; preds = %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb1.i5
+
+meshBB81:                                         ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB85:                                         ; preds = %meshBB81.bbcl.disp, %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp, %bb.i.i.bbcl.disp, %entry
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
+
+bb.i.i.bbcl.disp:                                 ; preds = %bb10.fragment
+  switch i8 undef, label %meshBB85 [
+    i8 123, label %bb.i1
+    i8 97, label %bb5
+    i8 44, label %meshBB81
+    i8 1, label %meshBB81.cl
+    i8 51, label %meshBB81.cl141
+  ]
+
+bb1.i.fragment.cl:                                ; preds = %bb1.i.fragment.bbcl.disp
+  invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad
+
+bb1.i.fragment.bbcl.disp:                         ; preds = %bb10.fragment
+  switch i8 undef, label %bb.i4 [
+    i8 97, label %bb1.i.fragment
+    i8 7, label %bb1.i.fragment.cl
+    i8 35, label %bb.i4.cl
+    i8 77, label %meshBB85
+  ]
+
+bb13.fragment.cl:                                 ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb13.fragment.cl135:                              ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb13.fragment.bbcl.disp:                          ; preds = %meshBB85, %lpad
+  switch i8 undef, label %bb10.fragment [
+    i8 67, label %bb13.fragment.cl
+    i8 108, label %bb13.fragment
+    i8 58, label %bb13.fragment.cl135
+  ]
+
+bb.i4.cl:                                         ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp
+  ret i32 undef
+
+bb.i4.bbcl.disp:                                  ; preds = %meshBB81.cl141, %meshBB81.cl, %meshBB81
+  switch i8 undef, label %bb.i4 [
+    i8 35, label %bb.i4.cl
+    i8 77, label %meshBB85
+  ]
+
+meshBB81.cl:                                      ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB81.cl141:                                   ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB81.bbcl.disp:                               ; preds = %meshBB81.cl141, %meshBB81.cl, %bb13.fragment.cl135, %bb13.fragment.cl, %bb1.i.fragment.cl, %meshBB85, %meshBB81, %bb.i1, %lpad, %bb13.fragment, %bb1.i.fragment, %bb5
+  switch i8 undef, label %meshBB85 [
+    i8 44, label %meshBB81
+    i8 1, label %meshBB81.cl
+    i8 51, label %meshBB81.cl141
+  ]
+}

diff --git a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
new file mode 100644
index 0000000..1e7a418
--- /dev/null
+++ b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; pr5391
+
+define void @t() nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl %ecx, %eax
+; CHECK: %eax = foo (%eax, %ecx)
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&{ax},%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: movl %eax, %ecx
+; CHECK: %ecx = foo (%ecx, %eax)
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&r,%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
new file mode 100644
index 0000000..f7ba661
--- /dev/null
+++ b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll

@@ -0,0 +1,63 @@
+; RUN: llc -relocation-model=pic < %s | FileCheck %s
+; PR5723
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { [1 x i64] }
+%link = type { %0* }
+%test = type { i32, %link }
+
+@data = global [2 x i64] zeroinitializer, align 64 ; <[2 x i64]*> [#uses=1]
+@ptr = linkonce thread_local global [1 x i64] [i64 ptrtoint ([2 x i64]* @data to i64)], align 64 ; <[1 x i64]*> [#uses=1]
+@link_ptr = linkonce thread_local global [1 x i64] zeroinitializer, align 64 ; <[1 x i64]*> [#uses=1]
+@_dm_my_pe = external global [1 x i64], align 64  ; <[1 x i64]*> [#uses=0]
+@_dm_pes_in_prog = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_mult = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_shift = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_pe_addr_loc = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_offset_addr_mask = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+
+define void @leaf() nounwind {
+; CHECK: leaf:
+; CHECK-NOT: -8(%rsp)
+; CHECK: leaq link_ptr@TLSGD
+; CHECK: call __tls_get_addr@PLT
+"file foo2.c, line 14, bb1":
+  %p = alloca %test*, align 8                     ; <%test**> [#uses=4]
+  br label %"file foo2.c, line 14, bb2"
+
+"file foo2.c, line 14, bb2":                      ; preds = %"file foo2.c, line 14, bb1"
+  br label %"@CFE_debug_label_0"
+
+"@CFE_debug_label_0":                             ; preds = %"file foo2.c, line 14, bb2"
+  %r = load %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
+  store %test* %r, %test** %p, align 8
+  br label %"@CFE_debug_label_2"
+
+"@CFE_debug_label_2":                             ; preds = %"@CFE_debug_label_0"
+  %r1 = load %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
+  %r2 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r3 = ptrtoint %test* %r2 to i64                ; <i64> [#uses=1]
+  %r4 = inttoptr i64 %r3 to %link**               ; <%link**> [#uses=1]
+  %r5 = getelementptr %link** %r4, i64 1          ; <%link**> [#uses=1]
+  store %link* %r1, %link** %r5, align 8
+  br label %"@CFE_debug_label_3"
+
+"@CFE_debug_label_3":                             ; preds = %"@CFE_debug_label_2"
+  %r6 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r7 = ptrtoint %test* %r6 to i64                ; <i64> [#uses=1]
+  %r8 = inttoptr i64 %r7 to %link*                ; <%link*> [#uses=1]
+  %r9 = getelementptr %link* %r8, i64 1           ; <%link*> [#uses=1]
+  store %link* %r9, %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8
+  br label %"@CFE_debug_label_4"
+
+"@CFE_debug_label_4":                             ; preds = %"@CFE_debug_label_3"
+  %r10 = load %test** %p, align 8                 ; <%test*> [#uses=1]
+  %r11 = ptrtoint %test* %r10 to i64              ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to i32*                ; <i32*> [#uses=1]
+  store i32 1, i32* %r12, align 4
+  br label %"@CFE_debug_label_5"
+
+"@CFE_debug_label_5":                             ; preds = %"@CFE_debug_label_4"
+  ret void
+}

diff --git a/test/CodeGen/X86/2009-12-12-CoalescerBug.ll b/test/CodeGen/X86/2009-12-12-CoalescerBug.ll
new file mode 100644
index 0000000..4e8f5fd
--- /dev/null
+++ b/test/CodeGen/X86/2009-12-12-CoalescerBug.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+define i32 @do_loop(i32* nocapture %sdp, i32* nocapture %ddp, i8* %mdp, i8* nocapture %cdp, i32 %w) nounwind readonly optsize ssp {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb5, %entry
+  %mask.1.in = load i8* undef, align 1            ; <i8> [#uses=3]
+  %0 = icmp eq i8 %mask.1.in, 0                   ; <i1> [#uses=1]
+  br i1 %0, label %bb5, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+; CHECK: %bb2
+; CHECK: movb %ch, %al
+  %1 = zext i8 %mask.1.in to i32                  ; <i32> [#uses=1]
+  %2 = zext i8 undef to i32                       ; <i32> [#uses=1]
+  %3 = mul i32 %2, %1                             ; <i32> [#uses=1]
+  %4 = add i32 %3, 1                              ; <i32> [#uses=1]
+  %5 = add i32 %4, 0                              ; <i32> [#uses=1]
+  %6 = lshr i32 %5, 8                             ; <i32> [#uses=1]
+  %retval12.i = trunc i32 %6 to i8                ; <i8> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1
+  %mask.0.in = phi i8 [ %retval12.i, %bb2 ], [ %mask.1.in, %bb1 ] ; <i8> [#uses=1]
+  %7 = icmp eq i8 %mask.0.in, 0                   ; <i1> [#uses=1]
+  br i1 %7, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3, %bb
+  br i1 undef, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb5
+  ret i32 undef
+}

diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll
new file mode 100644
index 0000000..de930d5
--- /dev/null
+++ b/test/CodeGen/X86/20090313-signext.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
+; RUN: grep {movswl	%ax, %edi} %t
+; RUN: grep {movw	(%rax), %ax} %t
+; XFAIL: *
+
+@x = common global i16 0
+
+define signext i16 @f() nounwind {
+entry:
+	%0 = tail call signext i16 @h() nounwind
+	%1 = sext i16 %0 to i32
+	tail call void @g(i32 %1) nounwind
+	%2 = load i16* @x, align 2
+	ret i16 %2
+}
+
+declare signext i16 @h()
+
+declare void @g(i32)

diff --git a/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll
new file mode 100644
index 0000000..e7004e2
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+; <rdar://problem/7499313>
+target triple = "i686-apple-darwin8"
+
+declare void @func2(i16 zeroext)
+
+define void @func1() nounwind {
+entry:
+  %t1 = icmp ne i8 undef, 0
+  %t2 = icmp eq i8 undef, 14
+  %t3 = and i1 %t1, %t2
+  %t4 = select i1 %t3, i16 0, i16 128
+  call void @func2(i16 zeroext %t4) nounwind
+  ret void
+}

diff --git a/test/CodeGen/X86/2010-01-07-ISelBug.ll b/test/CodeGen/X86/2010-01-07-ISelBug.ll
new file mode 100644
index 0000000..081fab7
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-07-ISelBug.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://r7519827
+
+define i32 @t() nounwind ssp {
+entry:
+  br label %if.end.i11
+
+if.end.i11:                                       ; preds = %lor.lhs.false.i10, %lor.lhs.false.i10, %lor.lhs.false.i10
+  br i1 undef, label %for.body161, label %for.end197
+
+for.body161:                                      ; preds = %if.end.i11
+  br label %for.end197
+
+for.end197:                                       ; preds = %for.body161, %if.end.i11
+  %mlucEntry.4 = phi i96 [ undef, %for.body161 ], [ undef, %if.end.i11 ] ; <i96> [#uses=2]
+  store i96 %mlucEntry.4, i96* undef, align 8
+  %tmp172 = lshr i96 %mlucEntry.4, 64             ; <i96> [#uses=1]
+  %tmp173 = trunc i96 %tmp172 to i32              ; <i32> [#uses=1]
+  %tmp1.i1.i = call i32 @llvm.bswap.i32(i32 %tmp173) nounwind ; <i32> [#uses=1]
+  store i32 %tmp1.i1.i, i32* undef, align 8
+  unreachable
+
+if.then283:                                       ; preds = %lor.lhs.false.i10, %do.end105, %for.end
+  ret i32 undef
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone

diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
new file mode 100644
index 0000000..3728f15
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll

@@ -0,0 +1,11 @@
+; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
+; CHECK: addps (
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
+	%A = load <4 x float>* %P, align 4
+	%B = add <4 x float> %A, %In
+	ret <4 x float> %B
+}

diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
new file mode 100644
index 0000000..172e1c7
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; rdar://r7512579
+
+; PHI defs in the atomic loop should be used by the add / adc
+; instructions. They should not be dead.
+
+define void @t(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl $1
+; CHECK: movl (%ebp), %eax
+; CHECK: movl 4(%ebp), %edx
+; CHECK: LBB1_1:
+; CHECK-NOT: movl $1
+; CHECK-NOT: movl $0
+; CHECK: addl
+; CHECK: adcl
+; CHECK: lock
+; CHECK: cmpxchg8b
+; CHECK: jne
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; <i64> [#uses=0]
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret void
+}
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind

diff --git a/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll
new file mode 100644
index 0000000..db98eef
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll

@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s
+;
+; The lowering of a switch combined with constand folding would leave spurious extra arguments on a PHI instruction.
+;
+target triple = "x86_64-apple-darwin10"
+
+define void @foo() {
+  br label %cond_true813.i
+
+cond_true813.i:                                   ; preds = %0
+  br i1 false, label %cond_true818.i, label %cond_next1146.i
+
+cond_true818.i:                                   ; preds = %cond_true813.i
+  br i1 false, label %recog_memoized.exit52, label %cond_next1146.i
+
+recog_memoized.exit52:                            ; preds = %cond_true818.i
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb857.i:                                          ; preds = %bb886.i, %bb866.i
+  %tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ] ; <i8*> [#uses=1]
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb866.i.loopexit:                                 ; preds = %bb874.i
+  br label %bb866.i
+
+bb866.i.loopexit31:                               ; preds = %cond_true903.i
+  br label %bb866.i
+
+bb866.i:                                          ; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %bb907.i, label %bb857.i
+
+bb874.i.preheader.loopexit:                       ; preds = %cond_true903.i, %cond_true903.i
+  ret void
+
+bb874.i:                                          ; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52
+  switch i32 0, label %bb886.i.preheader.loopexit [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit
+    i32 103, label %bb874.i.backedge
+    i32 114, label %bb874.i.backedge
+  ]
+
+bb874.i.backedge:                                 ; preds = %bb874.i, %bb874.i
+  ret void
+
+bb886.i.preheader.loopexit:                       ; preds = %bb874.i
+  ret void
+
+bb886.i.preheader:                                ; preds = %bb857.i, %recog_memoized.exit52
+  %tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ] ; <i8*> [#uses=1]
+  br label %bb886.i
+
+bb886.i:                                          ; preds = %cond_true903.i, %bb886.i.preheader
+  br i1 false, label %bb857.i, label %cond_true903.i
+
+cond_true903.i:                                   ; preds = %bb886.i
+  switch i32 0, label %bb886.i [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit31
+    i32 103, label %bb874.i.preheader.loopexit
+    i32 114, label %bb874.i.preheader.loopexit
+  ]
+
+bb907.i:                                          ; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %cond_next1146.i, label %cond_true910.i
+
+cond_true910.i:                                   ; preds = %bb907.i
+  ret void
+
+cond_next1146.i:                                  ; preds = %bb907.i, %cond_true818.i, %cond_true813.i
+  ret void
+
+bb2060.i:                                         ; No predecessors!
+  br i1 false, label %cond_true2064.i, label %bb2067.i
+
+cond_true2064.i:                                  ; preds = %bb2060.i
+  unreachable
+
+bb2067.i:                                         ; preds = %bb2060.i
+  ret void
+
+cond_next3473:                                    ; No predecessors!
+  ret void
+
+cond_next3521:                                    ; No predecessors!
+  ret void
+}

diff --git a/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/test/CodeGen/X86/2010-01-13-OptExtBug.ll
new file mode 100644
index 0000000..d49e2a8
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-13-OptExtBug.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR6027
+
+%class.OlsonTimeZone = type { i16, i32*, i8*, i16 }
+
+define void @XX(%class.OlsonTimeZone* %this) align 2 {
+entry:
+  %call = tail call i8* @_Z15uprv_malloc_4_2v()
+  %0 = bitcast i8* %call to double*
+  %tmp = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 3
+  %tmp2 = load i16* %tmp
+  %tmp525 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp626 = load i16* %tmp525
+  %cmp27 = icmp slt i16 %tmp2, %tmp626
+  br i1 %cmp27, label %bb.nph, label %for.end
+
+for.cond:
+  %tmp6 = load i16* %tmp5
+  %cmp = icmp slt i16 %inc, %tmp6
+  %indvar.next = add i32 %indvar, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+bb.nph:
+  %tmp10 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 2
+  %tmp17 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 1
+  %tmp5 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp29 = sext i16 %tmp2 to i32
+  %tmp31 = add i16 %tmp2, 1
+  %tmp32 = zext i16 %tmp31 to i32
+  br label %for.body
+
+for.body:
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %for.cond ]
+  %tmp30 = add i32 %indvar, %tmp29
+  %tmp33 = add i32 %indvar, %tmp32
+  %inc = trunc i32 %tmp33 to i16
+  %tmp11 = load i8** %tmp10
+  %arrayidx = getelementptr i8* %tmp11, i32 %tmp30
+  %tmp12 = load i8* %arrayidx
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i8* @_Z15uprv_malloc_4_2v()

diff --git a/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll b/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
new file mode 100644
index 0000000..5d96e4a
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @numvec_(i32* noalias %ncelet, i32* noalias %ncel, i32* noalias %nfac, i32* noalias %nfabor, i32* noalias %lregis, i32* noalias %irveci, i32* noalias %irvecb, [0 x [2 x i32]]* noalias %ifacel, [0 x i32]* noalias %ifabor, [0 x i32]* noalias %inumfi, [0 x i32]* noalias %inumfb, [1 x i32]* noalias %iworkf, [0 x i32]* noalias %ismbs) {
+"file bug754399.f90, line 1, bb1":
+	%r1037 = bitcast <2 x double> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
+	br label %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164"
+
+"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164":		; preds = %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164", %"file bug754399.f90, line 1, bb1"
+	%tmp641 = add i64 0, 48		; <i64> [#uses=1]
+	%tmp641642 = inttoptr i64 %tmp641 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%r1258 = load <4 x i32>* %tmp641642, align 4		; <<4 x i32>> [#uses=2]
+	%r1295 = extractelement <4 x i32> %r1258, i32 3		; <i32> [#uses=1]
+	%r1296 = sext i32 %r1295 to i64		; <i64> [#uses=1]
+	%r1297 = add i64 %r1296, -1		; <i64> [#uses=1]
+	%r1298183 = getelementptr [0 x i32]* %ismbs, i64 0, i64 %r1297		; <i32*> [#uses=1]
+	%r1298184 = load i32* %r1298183, align 4		; <i32> [#uses=1]
+	%r1301 = extractelement <4 x i32> %r1037, i32 3		; <i32> [#uses=1]
+	%r1302 = mul i32 %r1298184, %r1301		; <i32> [#uses=1]
+	%r1306 = insertelement <4 x i32> zeroinitializer, i32 %r1302, i32 3		; <<4 x i32>> [#uses=1]
+	%r1321 = add <4 x i32> %r1306, %r1258		; <<4 x i32>> [#uses=1]
+	%tmp643 = add i64 0, 48		; <i64> [#uses=1]
+	%tmp643644 = inttoptr i64 %tmp643 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	store <4 x i32> %r1321, <4 x i32>* %tmp643644, align 4
+	br label %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164"
+}

diff --git a/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
new file mode 100644
index 0000000..cd8960b
--- /dev/null
+++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats |& not grep ext-opt
+
+define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
+entry:
+  switch i8 undef, label %bb6 [
+    i8 9, label %bb5
+    i8 32, label %bb5
+    i8 10, label %bb5
+    i8 13, label %bb5
+    i8 12, label %bb5
+  ]
+
+bb5:                                              ; preds = %entry, %entry, %entry, %entry, %entry
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %entry
+  br i1 undef, label %bb7, label %bb9
+
+bb7:                                              ; preds = %bb6
+  unreachable
+
+bb9:                                              ; preds = %bb6
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=3]
+  br i1 undef, label %bb12, label %bb10
+
+bb10:                                             ; preds = %bb9
+  br i1 undef, label %bb12, label %bb11
+
+bb11:                                             ; preds = %bb10
+  unreachable
+
+bb12:                                             ; preds = %bb10, %bb9
+  br i1 undef, label %bb13, label %bb14
+
+bb13:                                             ; preds = %bb12
+  store i8 %0, i8* undef, align 1
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  br label %bb18
+
+bb14:                                             ; preds = %bb12
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb13
+  %termcode.0 = phi i32 [ %1, %bb13 ], [ undef, %bb14 ] ; <i32> [#uses=2]
+  %2 = icmp eq i8 %0, 0                           ; <i1> [#uses=1]
+  br i1 %2, label %bb21, label %bb19
+
+bb19:                                             ; preds = %bb18
+  br i1 undef, label %bb21, label %bb20
+
+bb20:                                             ; preds = %bb19
+  br label %bb21
+
+bb21:                                             ; preds = %bb20, %bb19, %bb18
+  %termcode.1 = phi i32 [ %termcode.0, %bb18 ], [ %termcode.0, %bb19 ], [ undef, %bb20 ] ; <i32> [#uses=0]
+  unreachable
+}

diff --git a/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll b/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll
new file mode 100644
index 0000000..2751174
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR6196
+
+%"char[]" = type [1 x i8]
+
+@.str = external constant %"char[]", align 1      ; <%"char[]"*> [#uses=1]
+
+define i32 @regex_subst() nounwind {
+entry:
+  %0 = tail call i32 bitcast (%"char[]"* @.str to i32 (i32)*)(i32 0) nounwind ; <i32> [#uses=1]
+  ret i32 %0
+}

diff --git a/test/CodeGen/X86/2010-02-03-DualUndef.ll b/test/CodeGen/X86/2010-02-03-DualUndef.ll
new file mode 100644
index 0000000..d116ecc
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-03-DualUndef.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64
+; PR6086
+define fastcc void @prepOutput() nounwind {
+bb:                                               ; preds = %output.exit
+  br label %bb.i1
+
+bb.i1:                                            ; preds = %bb7.i, %bb
+  br i1 undef, label %bb7.i, label %bb.nph.i
+
+bb.nph.i:                                         ; preds = %bb.i1
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb5.i6, %bb.nph.i
+  %tmp10.i = trunc i64 undef to i32               ; <i32> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb5.i6
+
+bb4.i:                                            ; preds = %bb3.i
+  br label %bb5.i6
+
+bb5.i6:                                           ; preds = %bb4.i, %bb3.i
+  %0 = phi i32 [ undef, %bb4.i ], [ undef, %bb3.i ] ; <i32> [#uses=1]
+  %1 = icmp slt i32 %0, %tmp10.i                  ; <i1> [#uses=1]
+  br i1 %1, label %bb7.i, label %bb3.i
+
+bb7.i:                                            ; preds = %bb5.i6, %bb.i1
+  br label %bb.i1
+}

diff --git a/test/CodeGen/X86/2010-02-04-SchedulerBug.ll b/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
new file mode 100644
index 0000000..c966e21d
--- /dev/null
+++ b/test/CodeGen/X86/2010-02-04-SchedulerBug.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin11
+; rdar://7604000
+
+%struct.a_t = type { i8*, i64*, i8*, i32, i32, i64*, i64*, i64* }
+%struct.b_t = type { i32, i32, i32, i32, i64, i64, i64, i64 }
+
+define void @t(i32 %cNum, i64 %max) nounwind optsize ssp noimplicitfloat {
+entry:
+  %0 = load %struct.b_t** null, align 4 ; <%struct.b_t*> [#uses=1]
+  %1 = getelementptr inbounds %struct.b_t* %0, i32 %cNum, i32 5 ; <i64*> [#uses=1]
+  %2 = load i64* %1, align 4                      ; <i64> [#uses=1]
+  %3 = icmp ult i64 %2, %max            ; <i1> [#uses=1]
+  %4 = getelementptr inbounds %struct.a_t* null, i32 0, i32 7 ; <i64**> [#uses=1]
+  %5 = load i64** %4, align 4                     ; <i64*> [#uses=0]
+  %6 = load i64* null, align 4                    ; <i64> [#uses=1]
+  br i1 %3, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  %7 = or i64 %6, undef                           ; <i64> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %misc_enables.0 = phi i64 [ undef, %bb ], [ %7, %bb2 ] ; <i64> [#uses=0]
+  ret void
+}

diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll
new file mode 100644
index 0000000..c51247a
--- /dev/null
+++ b/test/CodeGen/X86/3addr-16bit.ll

@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -asm-verbose=false | FileCheck %s -check-prefix=64BIT
+; rdar://7329206
+
+; In 32-bit the partial register stall would degrade performance.
+
+define zeroext i16 @t1(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t1:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 1(%eax), %ecx
+
+; 64BIT:     t1:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, 1                              ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+define zeroext i16 @t2(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t2:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal -1(%eax), %ecx
+
+; 64BIT:     t2:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal -1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, -1                             ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+declare void @foo(i16 zeroext)
+
+define zeroext i16 @t3(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t3:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 2(%eax), %ecx
+
+; 64BIT:     t3:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 2(%rsi), %eax
+  %0 = add i16 %k, 2                              ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t4:
+; 32BIT:     movw 16(%esp), %ax
+; 32BIT:     movw 20(%esp), %cx
+; 32BIT-NOT: movw %cx, %dx
+; 32BIT:     leal (%ecx,%eax), %edx
+
+; 64BIT:     t4:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal (%rsi,%rdi), %eax
+  %0 = add i16 %k, %c                             ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}

diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
new file mode 100644
index 0000000..30a1f36
--- /dev/null
+++ b/test/CodeGen/X86/3addr-or.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7527734
+
+define i32 @test(i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test:
+; CHECK: leal 3(%rdi), %eax
+  %0 = shl i32 %x, 5                              ; <i32> [#uses=1]
+  %1 = or i32 %0, 3                               ; <i32> [#uses=1]
+  ret i32 %1
+}
+
+define i64 @test2(i8 %A, i8 %B) nounwind {
+; CHECK: test2:
+; CHECK: shrq $4
+; CHECK-NOT: movq
+; CHECK-NOT: orq
+; CHECK: leaq
+; CHECK: ret
+  %C = zext i8 %A to i64                          ; <i64> [#uses=1]
+  %D = shl i64 %C, 4                              ; <i64> [#uses=1]
+  %E = and i64 %D, 48                             ; <i64> [#uses=1]
+  %F = zext i8 %B to i64                          ; <i64> [#uses=1]
+  %G = lshr i64 %F, 4                             ; <i64> [#uses=1]
+  %H = or i64 %G, %E                              ; <i64> [#uses=1]
+  ret i64 %H
+}

diff --git a/test/CodeGen/X86/Atomics-32.ll b/test/CodeGen/X86/Atomics-32.ll
new file mode 100644
index 0000000..0e9b73e
--- /dev/null
+++ b/test/CodeGen/X86/Atomics-32.ll

@@ -0,0 +1,818 @@
+; RUN: llc < %s -march=x86 > %t
+;; Note the 64-bit variants are not supported yet (in 32-bit mode).
+; ModuleID = 'Atomics.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@sc = common global i8 0		; <i8*> [#uses=52]
+@uc = common global i8 0		; <i8*> [#uses=100]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=23]
+@sl = common global i32 0		; <i32*> [#uses=15]
+@ul = common global i32 0		; <i32*> [#uses=15]
+
+define void @test_op_ignore() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=0]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %10, i32 1 )		; <i32>:11 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %12, i32 1 )		; <i32>:13 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 1 )		; <i8>:14 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 1 )		; <i8>:15 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 1 )		; <i16>:17 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 1 )		; <i16>:19 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 1 )		; <i32>:21 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 1 )		; <i32>:23 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 1 )		; <i32>:25 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 1 )		; <i32>:27 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 1 )		; <i8>:28 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 1 )		; <i8>:29 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 1 )		; <i16>:31 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 1 )		; <i16>:33 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 1 )		; <i32>:35 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 1 )		; <i32>:37 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:38 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %38, i32 1 )		; <i32>:39 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:40 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %40, i32 1 )		; <i32>:41 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 1 )		; <i8>:42 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 1 )		; <i8>:43 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 1 )		; <i16>:45 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 1 )		; <i16>:47 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 1 )		; <i32>:49 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 1 )		; <i32>:51 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:52 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %52, i32 1 )		; <i32>:53 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %54, i32 1 )		; <i32>:55 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 1 )		; <i8>:56 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 1 )		; <i8>:57 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 1 )		; <i16>:59 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 1 )		; <i16>:61 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 1 )		; <i32>:63 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 1 )		; <i32>:65 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %66, i32 1 )		; <i32>:67 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:68 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %68, i32 1 )		; <i32>:69 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 1 )		; <i8>:70 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 1 )		; <i8>:71 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 1 )		; <i16>:73 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 1 )		; <i16>:75 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 1 )		; <i32>:77 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 1 )		; <i32>:79 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %80, i32 1 )		; <i32>:81 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:82 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %82, i32 1 )		; <i32>:83 [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 11 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 11 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 11 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 11 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 11 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 11 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %10, i32 11 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %12, i32 11 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 11 )		; <i8>:14 [#uses=1]
+	store i8 %14, i8* @sc, align 1
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 11 )		; <i8>:15 [#uses=1]
+	store i8 %15, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 11 )		; <i16>:17 [#uses=1]
+	store i16 %17, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 11 )		; <i16>:19 [#uses=1]
+	store i16 %19, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 11 )		; <i32>:21 [#uses=1]
+	store i32 %21, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 11 )		; <i32>:23 [#uses=1]
+	store i32 %23, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 11 )		; <i32>:25 [#uses=1]
+	store i32 %25, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 11 )		; <i32>:27 [#uses=1]
+	store i32 %27, i32* @ul, align 4
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 11 )		; <i8>:28 [#uses=1]
+	store i8 %28, i8* @sc, align 1
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 11 )		; <i8>:29 [#uses=1]
+	store i8 %29, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 11 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 11 )		; <i16>:33 [#uses=1]
+	store i16 %33, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 11 )		; <i32>:35 [#uses=1]
+	store i32 %35, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 11 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:38 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %38, i32 11 )		; <i32>:39 [#uses=1]
+	store i32 %39, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:40 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %40, i32 11 )		; <i32>:41 [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 11 )		; <i8>:42 [#uses=1]
+	store i8 %42, i8* @sc, align 1
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 11 )		; <i8>:43 [#uses=1]
+	store i8 %43, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 11 )		; <i16>:45 [#uses=1]
+	store i16 %45, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 11 )		; <i16>:47 [#uses=1]
+	store i16 %47, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 11 )		; <i32>:49 [#uses=1]
+	store i32 %49, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 11 )		; <i32>:51 [#uses=1]
+	store i32 %51, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:52 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %52, i32 11 )		; <i32>:53 [#uses=1]
+	store i32 %53, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %54, i32 11 )		; <i32>:55 [#uses=1]
+	store i32 %55, i32* @ul, align 4
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 11 )		; <i8>:56 [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 11 )		; <i8>:57 [#uses=1]
+	store i8 %57, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 11 )		; <i16>:59 [#uses=1]
+	store i16 %59, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 11 )		; <i16>:61 [#uses=1]
+	store i16 %61, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 11 )		; <i32>:63 [#uses=1]
+	store i32 %63, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 11 )		; <i32>:65 [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %66, i32 11 )		; <i32>:67 [#uses=1]
+	store i32 %67, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:68 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %68, i32 11 )		; <i32>:69 [#uses=1]
+	store i32 %69, i32* @ul, align 4
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 11 )		; <i8>:70 [#uses=1]
+	store i8 %70, i8* @sc, align 1
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 11 )		; <i8>:71 [#uses=1]
+	store i8 %71, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 11 )		; <i16>:73 [#uses=1]
+	store i16 %73, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 11 )		; <i16>:75 [#uses=1]
+	store i16 %75, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 11 )		; <i32>:77 [#uses=1]
+	store i32 %77, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 11 )		; <i32>:79 [#uses=1]
+	store i32 %79, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %80, i32 11 )		; <i32>:81 [#uses=1]
+	store i32 %81, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:82 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %82, i32 11 )		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ul, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:2 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 %2 )		; <i8>:3 [#uses=1]
+	add i8 %3, %2		; <i8>:4 [#uses=1]
+	store i8 %4, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:5 [#uses=1]
+	zext i8 %5 to i32		; <i32>:6 [#uses=1]
+	trunc i32 %6 to i8		; <i8>:7 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 %7 )		; <i8>:8 [#uses=1]
+	add i8 %8, %7		; <i8>:9 [#uses=1]
+	store i8 %9, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:10 [#uses=1]
+	zext i8 %10 to i32		; <i32>:11 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:12 [#uses=1]
+	trunc i32 %11 to i16		; <i16>:13 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %12, i16 %13 )		; <i16>:14 [#uses=1]
+	add i16 %14, %13		; <i16>:15 [#uses=1]
+	store i16 %15, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:16 [#uses=1]
+	zext i8 %16 to i32		; <i32>:17 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	trunc i32 %17 to i16		; <i16>:19 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %18, i16 %19 )		; <i16>:20 [#uses=1]
+	add i16 %20, %19		; <i16>:21 [#uses=1]
+	store i16 %21, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:22 [#uses=1]
+	zext i8 %22 to i32		; <i32>:23 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %24, i32 %23 )		; <i32>:25 [#uses=1]
+	add i32 %25, %23		; <i32>:26 [#uses=1]
+	store i32 %26, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:27 [#uses=1]
+	zext i8 %27 to i32		; <i32>:28 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:29 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %29, i32 %28 )		; <i32>:30 [#uses=1]
+	add i32 %30, %28		; <i32>:31 [#uses=1]
+	store i32 %31, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:32 [#uses=1]
+	zext i8 %32 to i32		; <i32>:33 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %34, i32 %33 )		; <i32>:35 [#uses=1]
+	add i32 %35, %33		; <i32>:36 [#uses=1]
+	store i32 %36, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:37 [#uses=1]
+	zext i8 %37 to i32		; <i32>:38 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:39 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %39, i32 %38 )		; <i32>:40 [#uses=1]
+	add i32 %40, %38		; <i32>:41 [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:42 [#uses=1]
+	zext i8 %42 to i32		; <i32>:43 [#uses=1]
+	trunc i32 %43 to i8		; <i8>:44 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 %44 )		; <i8>:45 [#uses=1]
+	sub i8 %45, %44		; <i8>:46 [#uses=1]
+	store i8 %46, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:47 [#uses=1]
+	zext i8 %47 to i32		; <i32>:48 [#uses=1]
+	trunc i32 %48 to i8		; <i8>:49 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 %49 )		; <i8>:50 [#uses=1]
+	sub i8 %50, %49		; <i8>:51 [#uses=1]
+	store i8 %51, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:54 [#uses=1]
+	trunc i32 %53 to i16		; <i16>:55 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %54, i16 %55 )		; <i16>:56 [#uses=1]
+	sub i16 %56, %55		; <i16>:57 [#uses=1]
+	store i16 %57, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:58 [#uses=1]
+	zext i8 %58 to i32		; <i32>:59 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	trunc i32 %59 to i16		; <i16>:61 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %60, i16 %61 )		; <i16>:62 [#uses=1]
+	sub i16 %62, %61		; <i16>:63 [#uses=1]
+	store i16 %63, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:64 [#uses=1]
+	zext i8 %64 to i32		; <i32>:65 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %66, i32 %65 )		; <i32>:67 [#uses=1]
+	sub i32 %67, %65		; <i32>:68 [#uses=1]
+	store i32 %68, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:69 [#uses=1]
+	zext i8 %69 to i32		; <i32>:70 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:71 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %71, i32 %70 )		; <i32>:72 [#uses=1]
+	sub i32 %72, %70		; <i32>:73 [#uses=1]
+	store i32 %73, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:74 [#uses=1]
+	zext i8 %74 to i32		; <i32>:75 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %76, i32 %75 )		; <i32>:77 [#uses=1]
+	sub i32 %77, %75		; <i32>:78 [#uses=1]
+	store i32 %78, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:79 [#uses=1]
+	zext i8 %79 to i32		; <i32>:80 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:81 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %81, i32 %80 )		; <i32>:82 [#uses=1]
+	sub i32 %82, %80		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:84 [#uses=1]
+	zext i8 %84 to i32		; <i32>:85 [#uses=1]
+	trunc i32 %85 to i8		; <i8>:86 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 %86 )		; <i8>:87 [#uses=1]
+	or i8 %87, %86		; <i8>:88 [#uses=1]
+	store i8 %88, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:89 [#uses=1]
+	zext i8 %89 to i32		; <i32>:90 [#uses=1]
+	trunc i32 %90 to i8		; <i8>:91 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 %91 )		; <i8>:92 [#uses=1]
+	or i8 %92, %91		; <i8>:93 [#uses=1]
+	store i8 %93, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:94 [#uses=1]
+	zext i8 %94 to i32		; <i32>:95 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:96 [#uses=1]
+	trunc i32 %95 to i16		; <i16>:97 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %96, i16 %97 )		; <i16>:98 [#uses=1]
+	or i16 %98, %97		; <i16>:99 [#uses=1]
+	store i16 %99, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:100 [#uses=1]
+	zext i8 %100 to i32		; <i32>:101 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:102 [#uses=1]
+	trunc i32 %101 to i16		; <i16>:103 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %102, i16 %103 )		; <i16>:104 [#uses=1]
+	or i16 %104, %103		; <i16>:105 [#uses=1]
+	store i16 %105, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:106 [#uses=1]
+	zext i8 %106 to i32		; <i32>:107 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:108 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %108, i32 %107 )		; <i32>:109 [#uses=1]
+	or i32 %109, %107		; <i32>:110 [#uses=1]
+	store i32 %110, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:111 [#uses=1]
+	zext i8 %111 to i32		; <i32>:112 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:113 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %113, i32 %112 )		; <i32>:114 [#uses=1]
+	or i32 %114, %112		; <i32>:115 [#uses=1]
+	store i32 %115, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:116 [#uses=1]
+	zext i8 %116 to i32		; <i32>:117 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:118 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %118, i32 %117 )		; <i32>:119 [#uses=1]
+	or i32 %119, %117		; <i32>:120 [#uses=1]
+	store i32 %120, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:121 [#uses=1]
+	zext i8 %121 to i32		; <i32>:122 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:123 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %123, i32 %122 )		; <i32>:124 [#uses=1]
+	or i32 %124, %122		; <i32>:125 [#uses=1]
+	store i32 %125, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:126 [#uses=1]
+	zext i8 %126 to i32		; <i32>:127 [#uses=1]
+	trunc i32 %127 to i8		; <i8>:128 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 %128 )		; <i8>:129 [#uses=1]
+	xor i8 %129, %128		; <i8>:130 [#uses=1]
+	store i8 %130, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:131 [#uses=1]
+	zext i8 %131 to i32		; <i32>:132 [#uses=1]
+	trunc i32 %132 to i8		; <i8>:133 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 %133 )		; <i8>:134 [#uses=1]
+	xor i8 %134, %133		; <i8>:135 [#uses=1]
+	store i8 %135, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:136 [#uses=1]
+	zext i8 %136 to i32		; <i32>:137 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:138 [#uses=1]
+	trunc i32 %137 to i16		; <i16>:139 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %138, i16 %139 )		; <i16>:140 [#uses=1]
+	xor i16 %140, %139		; <i16>:141 [#uses=1]
+	store i16 %141, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:142 [#uses=1]
+	zext i8 %142 to i32		; <i32>:143 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:144 [#uses=1]
+	trunc i32 %143 to i16		; <i16>:145 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %144, i16 %145 )		; <i16>:146 [#uses=1]
+	xor i16 %146, %145		; <i16>:147 [#uses=1]
+	store i16 %147, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:148 [#uses=1]
+	zext i8 %148 to i32		; <i32>:149 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:150 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %150, i32 %149 )		; <i32>:151 [#uses=1]
+	xor i32 %151, %149		; <i32>:152 [#uses=1]
+	store i32 %152, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:153 [#uses=1]
+	zext i8 %153 to i32		; <i32>:154 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:155 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %155, i32 %154 )		; <i32>:156 [#uses=1]
+	xor i32 %156, %154		; <i32>:157 [#uses=1]
+	store i32 %157, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:158 [#uses=1]
+	zext i8 %158 to i32		; <i32>:159 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:160 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %160, i32 %159 )		; <i32>:161 [#uses=1]
+	xor i32 %161, %159		; <i32>:162 [#uses=1]
+	store i32 %162, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:163 [#uses=1]
+	zext i8 %163 to i32		; <i32>:164 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:165 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %165, i32 %164 )		; <i32>:166 [#uses=1]
+	xor i32 %166, %164		; <i32>:167 [#uses=1]
+	store i32 %167, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:168 [#uses=1]
+	zext i8 %168 to i32		; <i32>:169 [#uses=1]
+	trunc i32 %169 to i8		; <i8>:170 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 %170 )		; <i8>:171 [#uses=1]
+	and i8 %171, %170		; <i8>:172 [#uses=1]
+	store i8 %172, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:173 [#uses=1]
+	zext i8 %173 to i32		; <i32>:174 [#uses=1]
+	trunc i32 %174 to i8		; <i8>:175 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 %175 )		; <i8>:176 [#uses=1]
+	and i8 %176, %175		; <i8>:177 [#uses=1]
+	store i8 %177, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:178 [#uses=1]
+	zext i8 %178 to i32		; <i32>:179 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:180 [#uses=1]
+	trunc i32 %179 to i16		; <i16>:181 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %180, i16 %181 )		; <i16>:182 [#uses=1]
+	and i16 %182, %181		; <i16>:183 [#uses=1]
+	store i16 %183, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:184 [#uses=1]
+	zext i8 %184 to i32		; <i32>:185 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:186 [#uses=1]
+	trunc i32 %185 to i16		; <i16>:187 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %186, i16 %187 )		; <i16>:188 [#uses=1]
+	and i16 %188, %187		; <i16>:189 [#uses=1]
+	store i16 %189, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:190 [#uses=1]
+	zext i8 %190 to i32		; <i32>:191 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:192 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %192, i32 %191 )		; <i32>:193 [#uses=1]
+	and i32 %193, %191		; <i32>:194 [#uses=1]
+	store i32 %194, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:195 [#uses=1]
+	zext i8 %195 to i32		; <i32>:196 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:197 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %197, i32 %196 )		; <i32>:198 [#uses=1]
+	and i32 %198, %196		; <i32>:199 [#uses=1]
+	store i32 %199, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:200 [#uses=1]
+	zext i8 %200 to i32		; <i32>:201 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:202 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %202, i32 %201 )		; <i32>:203 [#uses=1]
+	and i32 %203, %201		; <i32>:204 [#uses=1]
+	store i32 %204, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:205 [#uses=1]
+	zext i8 %205 to i32		; <i32>:206 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:207 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %207, i32 %206 )		; <i32>:208 [#uses=1]
+	and i32 %208, %206		; <i32>:209 [#uses=1]
+	store i32 %209, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:210 [#uses=1]
+	zext i8 %210 to i32		; <i32>:211 [#uses=1]
+	trunc i32 %211 to i8		; <i8>:212 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 %212 )		; <i8>:213 [#uses=1]
+	xor i8 %213, -1		; <i8>:214 [#uses=1]
+	and i8 %214, %212		; <i8>:215 [#uses=1]
+	store i8 %215, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:216 [#uses=1]
+	zext i8 %216 to i32		; <i32>:217 [#uses=1]
+	trunc i32 %217 to i8		; <i8>:218 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 %218 )		; <i8>:219 [#uses=1]
+	xor i8 %219, -1		; <i8>:220 [#uses=1]
+	and i8 %220, %218		; <i8>:221 [#uses=1]
+	store i8 %221, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:222 [#uses=1]
+	zext i8 %222 to i32		; <i32>:223 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:224 [#uses=1]
+	trunc i32 %223 to i16		; <i16>:225 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %224, i16 %225 )		; <i16>:226 [#uses=1]
+	xor i16 %226, -1		; <i16>:227 [#uses=1]
+	and i16 %227, %225		; <i16>:228 [#uses=1]
+	store i16 %228, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:229 [#uses=1]
+	zext i8 %229 to i32		; <i32>:230 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:231 [#uses=1]
+	trunc i32 %230 to i16		; <i16>:232 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %231, i16 %232 )		; <i16>:233 [#uses=1]
+	xor i16 %233, -1		; <i16>:234 [#uses=1]
+	and i16 %234, %232		; <i16>:235 [#uses=1]
+	store i16 %235, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:236 [#uses=1]
+	zext i8 %236 to i32		; <i32>:237 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:238 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %238, i32 %237 )		; <i32>:239 [#uses=1]
+	xor i32 %239, -1		; <i32>:240 [#uses=1]
+	and i32 %240, %237		; <i32>:241 [#uses=1]
+	store i32 %241, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:242 [#uses=1]
+	zext i8 %242 to i32		; <i32>:243 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:244 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %244, i32 %243 )		; <i32>:245 [#uses=1]
+	xor i32 %245, -1		; <i32>:246 [#uses=1]
+	and i32 %246, %243		; <i32>:247 [#uses=1]
+	store i32 %247, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:248 [#uses=1]
+	zext i8 %248 to i32		; <i32>:249 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:250 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %250, i32 %249 )		; <i32>:251 [#uses=1]
+	xor i32 %251, -1		; <i32>:252 [#uses=1]
+	and i32 %252, %249		; <i32>:253 [#uses=1]
+	store i32 %253, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:254 [#uses=1]
+	zext i8 %254 to i32		; <i32>:255 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:256 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %256, i32 %255 )		; <i32>:257 [#uses=1]
+	xor i32 %257, -1		; <i32>:258 [#uses=1]
+	and i32 %258, %255		; <i32>:259 [#uses=1]
+	store i32 %259, i32* @ul, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	load i8* @sc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	load i8* @uc, align 1		; <i8>:2 [#uses=1]
+	zext i8 %2 to i32		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i8		; <i8>:4 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:5 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %4, i8 %5 )		; <i8>:6 [#uses=1]
+	store i8 %6, i8* @sc, align 1
+	load i8* @sc, align 1		; <i8>:7 [#uses=1]
+	zext i8 %7 to i32		; <i32>:8 [#uses=1]
+	load i8* @uc, align 1		; <i8>:9 [#uses=1]
+	zext i8 %9 to i32		; <i32>:10 [#uses=1]
+	trunc i32 %10 to i8		; <i8>:11 [#uses=1]
+	trunc i32 %8 to i8		; <i8>:12 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %11, i8 %12 )		; <i8>:13 [#uses=1]
+	store i8 %13, i8* @uc, align 1
+	load i8* @sc, align 1		; <i8>:14 [#uses=1]
+	sext i8 %14 to i16		; <i16>:15 [#uses=1]
+	zext i16 %15 to i32		; <i32>:16 [#uses=1]
+	load i8* @uc, align 1		; <i8>:17 [#uses=1]
+	zext i8 %17 to i32		; <i32>:18 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:19 [#uses=1]
+	trunc i32 %18 to i16		; <i16>:20 [#uses=1]
+	trunc i32 %16 to i16		; <i16>:21 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %19, i16 %20, i16 %21 )		; <i16>:22 [#uses=1]
+	store i16 %22, i16* @ss, align 2
+	load i8* @sc, align 1		; <i8>:23 [#uses=1]
+	sext i8 %23 to i16		; <i16>:24 [#uses=1]
+	zext i16 %24 to i32		; <i32>:25 [#uses=1]
+	load i8* @uc, align 1		; <i8>:26 [#uses=1]
+	zext i8 %26 to i32		; <i32>:27 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:28 [#uses=1]
+	trunc i32 %27 to i16		; <i16>:29 [#uses=1]
+	trunc i32 %25 to i16		; <i16>:30 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %28, i16 %29, i16 %30 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @us, align 2
+	load i8* @sc, align 1		; <i8>:32 [#uses=1]
+	sext i8 %32 to i32		; <i32>:33 [#uses=1]
+	load i8* @uc, align 1		; <i8>:34 [#uses=1]
+	zext i8 %34 to i32		; <i32>:35 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %36, i32 %35, i32 %33 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @si, align 4
+	load i8* @sc, align 1		; <i8>:38 [#uses=1]
+	sext i8 %38 to i32		; <i32>:39 [#uses=1]
+	load i8* @uc, align 1		; <i8>:40 [#uses=1]
+	zext i8 %40 to i32		; <i32>:41 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %42, i32 %41, i32 %39 )		; <i32>:43 [#uses=1]
+	store i32 %43, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:44 [#uses=1]
+	sext i8 %44 to i32		; <i32>:45 [#uses=1]
+	load i8* @uc, align 1		; <i8>:46 [#uses=1]
+	zext i8 %46 to i32		; <i32>:47 [#uses=1]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %48, i32 %47, i32 %45 )		; <i32>:49 [#uses=1]
+	store i32 %49, i32* @sl, align 4
+	load i8* @sc, align 1		; <i8>:50 [#uses=1]
+	sext i8 %50 to i32		; <i32>:51 [#uses=1]
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=1]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %54, i32 %53, i32 %51 )		; <i32>:55 [#uses=1]
+	store i32 %55, i32* @ul, align 4
+	load i8* @sc, align 1		; <i8>:56 [#uses=1]
+	zext i8 %56 to i32		; <i32>:57 [#uses=1]
+	load i8* @uc, align 1		; <i8>:58 [#uses=1]
+	zext i8 %58 to i32		; <i32>:59 [#uses=1]
+	trunc i32 %59 to i8		; <i8>:60 [#uses=2]
+	trunc i32 %57 to i8		; <i8>:61 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %60, i8 %61 )		; <i8>:62 [#uses=1]
+	icmp eq i8 %62, %60		; <i1>:63 [#uses=1]
+	zext i1 %63 to i8		; <i8>:64 [#uses=1]
+	zext i8 %64 to i32		; <i32>:65 [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:66 [#uses=1]
+	zext i8 %66 to i32		; <i32>:67 [#uses=1]
+	load i8* @uc, align 1		; <i8>:68 [#uses=1]
+	zext i8 %68 to i32		; <i32>:69 [#uses=1]
+	trunc i32 %69 to i8		; <i8>:70 [#uses=2]
+	trunc i32 %67 to i8		; <i8>:71 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %70, i8 %71 )		; <i8>:72 [#uses=1]
+	icmp eq i8 %72, %70		; <i1>:73 [#uses=1]
+	zext i1 %73 to i8		; <i8>:74 [#uses=1]
+	zext i8 %74 to i32		; <i32>:75 [#uses=1]
+	store i32 %75, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:76 [#uses=1]
+	sext i8 %76 to i16		; <i16>:77 [#uses=1]
+	zext i16 %77 to i32		; <i32>:78 [#uses=1]
+	load i8* @uc, align 1		; <i8>:79 [#uses=1]
+	zext i8 %79 to i32		; <i32>:80 [#uses=1]
+	trunc i32 %80 to i8		; <i8>:81 [#uses=2]
+	trunc i32 %78 to i8		; <i8>:82 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @ss to i8*), i8 %81, i8 %82 )		; <i8>:83 [#uses=1]
+	icmp eq i8 %83, %81		; <i1>:84 [#uses=1]
+	zext i1 %84 to i8		; <i8>:85 [#uses=1]
+	zext i8 %85 to i32		; <i32>:86 [#uses=1]
+	store i32 %86, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:87 [#uses=1]
+	sext i8 %87 to i16		; <i16>:88 [#uses=1]
+	zext i16 %88 to i32		; <i32>:89 [#uses=1]
+	load i8* @uc, align 1		; <i8>:90 [#uses=1]
+	zext i8 %90 to i32		; <i32>:91 [#uses=1]
+	trunc i32 %91 to i8		; <i8>:92 [#uses=2]
+	trunc i32 %89 to i8		; <i8>:93 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @us to i8*), i8 %92, i8 %93 )		; <i8>:94 [#uses=1]
+	icmp eq i8 %94, %92		; <i1>:95 [#uses=1]
+	zext i1 %95 to i8		; <i8>:96 [#uses=1]
+	zext i8 %96 to i32		; <i32>:97 [#uses=1]
+	store i32 %97, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:98 [#uses=1]
+	sext i8 %98 to i32		; <i32>:99 [#uses=1]
+	load i8* @uc, align 1		; <i8>:100 [#uses=1]
+	zext i8 %100 to i32		; <i32>:101 [#uses=1]
+	trunc i32 %101 to i8		; <i8>:102 [#uses=2]
+	trunc i32 %99 to i8		; <i8>:103 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @si to i8*), i8 %102, i8 %103 )		; <i8>:104 [#uses=1]
+	icmp eq i8 %104, %102		; <i1>:105 [#uses=1]
+	zext i1 %105 to i8		; <i8>:106 [#uses=1]
+	zext i8 %106 to i32		; <i32>:107 [#uses=1]
+	store i32 %107, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:108 [#uses=1]
+	sext i8 %108 to i32		; <i32>:109 [#uses=1]
+	load i8* @uc, align 1		; <i8>:110 [#uses=1]
+	zext i8 %110 to i32		; <i32>:111 [#uses=1]
+	trunc i32 %111 to i8		; <i8>:112 [#uses=2]
+	trunc i32 %109 to i8		; <i8>:113 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @ui to i8*), i8 %112, i8 %113 )		; <i8>:114 [#uses=1]
+	icmp eq i8 %114, %112		; <i1>:115 [#uses=1]
+	zext i1 %115 to i8		; <i8>:116 [#uses=1]
+	zext i8 %116 to i32		; <i32>:117 [#uses=1]
+	store i32 %117, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:118 [#uses=1]
+	sext i8 %118 to i32		; <i32>:119 [#uses=1]
+	load i8* @uc, align 1		; <i8>:120 [#uses=1]
+	zext i8 %120 to i32		; <i32>:121 [#uses=1]
+	trunc i32 %121 to i8		; <i8>:122 [#uses=2]
+	trunc i32 %119 to i8		; <i8>:123 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @sl to i8*), i8 %122, i8 %123 )		; <i8>:124 [#uses=1]
+	icmp eq i8 %124, %122		; <i1>:125 [#uses=1]
+	zext i1 %125 to i8		; <i8>:126 [#uses=1]
+	zext i8 %126 to i32		; <i32>:127 [#uses=1]
+	store i32 %127, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:128 [#uses=1]
+	sext i8 %128 to i32		; <i32>:129 [#uses=1]
+	load i8* @uc, align 1		; <i8>:130 [#uses=1]
+	zext i8 %130 to i32		; <i32>:131 [#uses=1]
+	trunc i32 %131 to i8		; <i8>:132 [#uses=2]
+	trunc i32 %129 to i8		; <i8>:133 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @ul to i8*), i8 %132, i8 %133 )		; <i8>:134 [#uses=1]
+	icmp eq i8 %134, %132		; <i1>:135 [#uses=1]
+	zext i1 %135 to i8		; <i8>:136 [#uses=1]
+	zext i8 %136 to i32		; <i32>:137 [#uses=1]
+	store i32 %137, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %10, i32 1 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %12, i32 1 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true, i1 false )
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:14 [#uses=1]
+	volatile store i16 0, i16* %14, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:15 [#uses=1]
+	volatile store i16 0, i16* %15, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:16 [#uses=1]
+	volatile store i32 0, i32* %16, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:17 [#uses=1]
+	volatile store i32 0, i32* %17, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:18 [#uses=1]
+	volatile store i32 0, i32* %18, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:19 [#uses=1]
+	volatile store i32 0, i32* %19, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind

diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
new file mode 100644
index 0000000..ac174b9
--- /dev/null
+++ b/test/CodeGen/X86/Atomics-64.ll

@@ -0,0 +1,1015 @@
+; RUN: llc < %s -march=x86-64 > %t
+; ModuleID = 'Atomics.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+@sc = common global i8 0		; <i8*> [#uses=56]
+@uc = common global i8 0		; <i8*> [#uses=116]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=25]
+@sl = common global i64 0		; <i64*> [#uses=15]
+@ul = common global i64 0		; <i64*> [#uses=15]
+@sll = common global i64 0		; <i64*> [#uses=15]
+@ull = common global i64 0		; <i64*> [#uses=15]
+
+define void @test_op_ignore() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=0]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %10, i64 1 )		; <i64>:11 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %12, i64 1 )		; <i64>:13 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:14 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %14, i64 1 )		; <i64>:15 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:16 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %16, i64 1 )		; <i64>:17 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 1 )		; <i8>:18 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 1 )		; <i8>:19 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:20 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %20, i16 1 )		; <i16>:21 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:22 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %22, i16 1 )		; <i16>:23 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 1 )		; <i32>:25 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 1 )		; <i32>:27 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:28 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %28, i64 1 )		; <i64>:29 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:30 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %30, i64 1 )		; <i64>:31 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:32 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %32, i64 1 )		; <i64>:33 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:34 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %34, i64 1 )		; <i64>:35 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 1 )		; <i8>:36 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 1 )		; <i8>:37 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:38 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %38, i16 1 )		; <i16>:39 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:40 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %40, i16 1 )		; <i16>:41 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %42, i32 1 )		; <i32>:43 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:44 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %44, i32 1 )		; <i32>:45 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:46 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %46, i64 1 )		; <i64>:47 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:48 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %48, i64 1 )		; <i64>:49 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:50 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %50, i64 1 )		; <i64>:51 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:52 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %52, i64 1 )		; <i64>:53 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 1 )		; <i8>:54 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 1 )		; <i8>:55 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:56 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %56, i16 1 )		; <i16>:57 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %58, i16 1 )		; <i16>:59 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:60 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %60, i32 1 )		; <i32>:61 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %62, i32 1 )		; <i32>:63 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:64 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %64, i64 1 )		; <i64>:65 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %66, i64 1 )		; <i64>:67 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:68 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %68, i64 1 )		; <i64>:69 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:70 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %70, i64 1 )		; <i64>:71 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 1 )		; <i8>:72 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 1 )		; <i8>:73 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %74, i16 1 )		; <i16>:75 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:76 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %76, i16 1 )		; <i16>:77 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %78, i32 1 )		; <i32>:79 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %80, i32 1 )		; <i32>:81 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:82 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %82, i64 1 )		; <i64>:83 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:84 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %84, i64 1 )		; <i64>:85 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:86 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %86, i64 1 )		; <i64>:87 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:88 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %88, i64 1 )		; <i64>:89 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 1 )		; <i8>:90 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 1 )		; <i8>:91 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:92 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %92, i16 1 )		; <i16>:93 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:94 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %94, i16 1 )		; <i16>:95 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:96 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %96, i32 1 )		; <i32>:97 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:98 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %98, i32 1 )		; <i32>:99 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:100 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %100, i64 1 )		; <i64>:101 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:102 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %102, i64 1 )		; <i64>:103 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:104 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %104, i64 1 )		; <i64>:105 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:106 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %106, i64 1 )		; <i64>:107 [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.or.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.xor.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.and.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.nand.i64.p0i64(i64*, i64) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 11 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 11 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 11 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 11 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 11 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 11 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %10, i64 11 )		; <i64>:11 [#uses=1]
+	store i64 %11, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %12, i64 11 )		; <i64>:13 [#uses=1]
+	store i64 %13, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:14 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %14, i64 11 )		; <i64>:15 [#uses=1]
+	store i64 %15, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:16 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %16, i64 11 )		; <i64>:17 [#uses=1]
+	store i64 %17, i64* @ull, align 8
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 11 )		; <i8>:18 [#uses=1]
+	store i8 %18, i8* @sc, align 1
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 11 )		; <i8>:19 [#uses=1]
+	store i8 %19, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:20 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %20, i16 11 )		; <i16>:21 [#uses=1]
+	store i16 %21, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:22 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %22, i16 11 )		; <i16>:23 [#uses=1]
+	store i16 %23, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 11 )		; <i32>:25 [#uses=1]
+	store i32 %25, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 11 )		; <i32>:27 [#uses=1]
+	store i32 %27, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:28 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %28, i64 11 )		; <i64>:29 [#uses=1]
+	store i64 %29, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:30 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %30, i64 11 )		; <i64>:31 [#uses=1]
+	store i64 %31, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:32 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %32, i64 11 )		; <i64>:33 [#uses=1]
+	store i64 %33, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:34 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %34, i64 11 )		; <i64>:35 [#uses=1]
+	store i64 %35, i64* @ull, align 8
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 11 )		; <i8>:36 [#uses=1]
+	store i8 %36, i8* @sc, align 1
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 11 )		; <i8>:37 [#uses=1]
+	store i8 %37, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:38 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %38, i16 11 )		; <i16>:39 [#uses=1]
+	store i16 %39, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:40 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %40, i16 11 )		; <i16>:41 [#uses=1]
+	store i16 %41, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %42, i32 11 )		; <i32>:43 [#uses=1]
+	store i32 %43, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:44 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %44, i32 11 )		; <i32>:45 [#uses=1]
+	store i32 %45, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:46 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %46, i64 11 )		; <i64>:47 [#uses=1]
+	store i64 %47, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:48 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %48, i64 11 )		; <i64>:49 [#uses=1]
+	store i64 %49, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:50 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %50, i64 11 )		; <i64>:51 [#uses=1]
+	store i64 %51, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:52 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %52, i64 11 )		; <i64>:53 [#uses=1]
+	store i64 %53, i64* @ull, align 8
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 11 )		; <i8>:54 [#uses=1]
+	store i8 %54, i8* @sc, align 1
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 11 )		; <i8>:55 [#uses=1]
+	store i8 %55, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:56 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %56, i16 11 )		; <i16>:57 [#uses=1]
+	store i16 %57, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %58, i16 11 )		; <i16>:59 [#uses=1]
+	store i16 %59, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:60 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %60, i32 11 )		; <i32>:61 [#uses=1]
+	store i32 %61, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %62, i32 11 )		; <i32>:63 [#uses=1]
+	store i32 %63, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:64 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %64, i64 11 )		; <i64>:65 [#uses=1]
+	store i64 %65, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %66, i64 11 )		; <i64>:67 [#uses=1]
+	store i64 %67, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:68 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %68, i64 11 )		; <i64>:69 [#uses=1]
+	store i64 %69, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:70 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %70, i64 11 )		; <i64>:71 [#uses=1]
+	store i64 %71, i64* @ull, align 8
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 11 )		; <i8>:72 [#uses=1]
+	store i8 %72, i8* @sc, align 1
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 11 )		; <i8>:73 [#uses=1]
+	store i8 %73, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %74, i16 11 )		; <i16>:75 [#uses=1]
+	store i16 %75, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:76 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %76, i16 11 )		; <i16>:77 [#uses=1]
+	store i16 %77, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %78, i32 11 )		; <i32>:79 [#uses=1]
+	store i32 %79, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %80, i32 11 )		; <i32>:81 [#uses=1]
+	store i32 %81, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:82 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %82, i64 11 )		; <i64>:83 [#uses=1]
+	store i64 %83, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:84 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %84, i64 11 )		; <i64>:85 [#uses=1]
+	store i64 %85, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:86 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %86, i64 11 )		; <i64>:87 [#uses=1]
+	store i64 %87, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:88 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %88, i64 11 )		; <i64>:89 [#uses=1]
+	store i64 %89, i64* @ull, align 8
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 11 )		; <i8>:90 [#uses=1]
+	store i8 %90, i8* @sc, align 1
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 11 )		; <i8>:91 [#uses=1]
+	store i8 %91, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:92 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %92, i16 11 )		; <i16>:93 [#uses=1]
+	store i16 %93, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:94 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %94, i16 11 )		; <i16>:95 [#uses=1]
+	store i16 %95, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:96 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %96, i32 11 )		; <i32>:97 [#uses=1]
+	store i32 %97, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:98 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %98, i32 11 )		; <i32>:99 [#uses=1]
+	store i32 %99, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:100 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %100, i64 11 )		; <i64>:101 [#uses=1]
+	store i64 %101, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:102 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %102, i64 11 )		; <i64>:103 [#uses=1]
+	store i64 %103, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:104 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %104, i64 11 )		; <i64>:105 [#uses=1]
+	store i64 %105, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:106 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %106, i64 11 )		; <i64>:107 [#uses=1]
+	store i64 %107, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:2 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 %2 )		; <i8>:3 [#uses=1]
+	add i8 %3, %2		; <i8>:4 [#uses=1]
+	store i8 %4, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:5 [#uses=1]
+	zext i8 %5 to i32		; <i32>:6 [#uses=1]
+	trunc i32 %6 to i8		; <i8>:7 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 %7 )		; <i8>:8 [#uses=1]
+	add i8 %8, %7		; <i8>:9 [#uses=1]
+	store i8 %9, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:10 [#uses=1]
+	zext i8 %10 to i32		; <i32>:11 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:12 [#uses=1]
+	trunc i32 %11 to i16		; <i16>:13 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %12, i16 %13 )		; <i16>:14 [#uses=1]
+	add i16 %14, %13		; <i16>:15 [#uses=1]
+	store i16 %15, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:16 [#uses=1]
+	zext i8 %16 to i32		; <i32>:17 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	trunc i32 %17 to i16		; <i16>:19 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %18, i16 %19 )		; <i16>:20 [#uses=1]
+	add i16 %20, %19		; <i16>:21 [#uses=1]
+	store i16 %21, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:22 [#uses=1]
+	zext i8 %22 to i32		; <i32>:23 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %24, i32 %23 )		; <i32>:25 [#uses=1]
+	add i32 %25, %23		; <i32>:26 [#uses=1]
+	store i32 %26, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:27 [#uses=1]
+	zext i8 %27 to i32		; <i32>:28 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:29 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %29, i32 %28 )		; <i32>:30 [#uses=1]
+	add i32 %30, %28		; <i32>:31 [#uses=1]
+	store i32 %31, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:32 [#uses=1]
+	zext i8 %32 to i64		; <i64>:33 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:34 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %34, i64 %33 )		; <i64>:35 [#uses=1]
+	add i64 %35, %33		; <i64>:36 [#uses=1]
+	store i64 %36, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:37 [#uses=1]
+	zext i8 %37 to i64		; <i64>:38 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:39 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %39, i64 %38 )		; <i64>:40 [#uses=1]
+	add i64 %40, %38		; <i64>:41 [#uses=1]
+	store i64 %41, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:42 [#uses=1]
+	zext i8 %42 to i64		; <i64>:43 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:44 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %44, i64 %43 )		; <i64>:45 [#uses=1]
+	add i64 %45, %43		; <i64>:46 [#uses=1]
+	store i64 %46, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:47 [#uses=1]
+	zext i8 %47 to i64		; <i64>:48 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:49 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %49, i64 %48 )		; <i64>:50 [#uses=1]
+	add i64 %50, %48		; <i64>:51 [#uses=1]
+	store i64 %51, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=1]
+	trunc i32 %53 to i8		; <i8>:54 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 %54 )		; <i8>:55 [#uses=1]
+	sub i8 %55, %54		; <i8>:56 [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:57 [#uses=1]
+	zext i8 %57 to i32		; <i32>:58 [#uses=1]
+	trunc i32 %58 to i8		; <i8>:59 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 %59 )		; <i8>:60 [#uses=1]
+	sub i8 %60, %59		; <i8>:61 [#uses=1]
+	store i8 %61, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:62 [#uses=1]
+	zext i8 %62 to i32		; <i32>:63 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:64 [#uses=1]
+	trunc i32 %63 to i16		; <i16>:65 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %64, i16 %65 )		; <i16>:66 [#uses=1]
+	sub i16 %66, %65		; <i16>:67 [#uses=1]
+	store i16 %67, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:68 [#uses=1]
+	zext i8 %68 to i32		; <i32>:69 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:70 [#uses=1]
+	trunc i32 %69 to i16		; <i16>:71 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %70, i16 %71 )		; <i16>:72 [#uses=1]
+	sub i16 %72, %71		; <i16>:73 [#uses=1]
+	store i16 %73, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:74 [#uses=1]
+	zext i8 %74 to i32		; <i32>:75 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %76, i32 %75 )		; <i32>:77 [#uses=1]
+	sub i32 %77, %75		; <i32>:78 [#uses=1]
+	store i32 %78, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:79 [#uses=1]
+	zext i8 %79 to i32		; <i32>:80 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:81 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %81, i32 %80 )		; <i32>:82 [#uses=1]
+	sub i32 %82, %80		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:84 [#uses=1]
+	zext i8 %84 to i64		; <i64>:85 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:86 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %86, i64 %85 )		; <i64>:87 [#uses=1]
+	sub i64 %87, %85		; <i64>:88 [#uses=1]
+	store i64 %88, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:89 [#uses=1]
+	zext i8 %89 to i64		; <i64>:90 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:91 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %91, i64 %90 )		; <i64>:92 [#uses=1]
+	sub i64 %92, %90		; <i64>:93 [#uses=1]
+	store i64 %93, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:94 [#uses=1]
+	zext i8 %94 to i64		; <i64>:95 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:96 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %96, i64 %95 )		; <i64>:97 [#uses=1]
+	sub i64 %97, %95		; <i64>:98 [#uses=1]
+	store i64 %98, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:99 [#uses=1]
+	zext i8 %99 to i64		; <i64>:100 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:101 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %101, i64 %100 )		; <i64>:102 [#uses=1]
+	sub i64 %102, %100		; <i64>:103 [#uses=1]
+	store i64 %103, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:104 [#uses=1]
+	zext i8 %104 to i32		; <i32>:105 [#uses=1]
+	trunc i32 %105 to i8		; <i8>:106 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 %106 )		; <i8>:107 [#uses=1]
+	or i8 %107, %106		; <i8>:108 [#uses=1]
+	store i8 %108, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:109 [#uses=1]
+	zext i8 %109 to i32		; <i32>:110 [#uses=1]
+	trunc i32 %110 to i8		; <i8>:111 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 %111 )		; <i8>:112 [#uses=1]
+	or i8 %112, %111		; <i8>:113 [#uses=1]
+	store i8 %113, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:114 [#uses=1]
+	zext i8 %114 to i32		; <i32>:115 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:116 [#uses=1]
+	trunc i32 %115 to i16		; <i16>:117 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %116, i16 %117 )		; <i16>:118 [#uses=1]
+	or i16 %118, %117		; <i16>:119 [#uses=1]
+	store i16 %119, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:120 [#uses=1]
+	zext i8 %120 to i32		; <i32>:121 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:122 [#uses=1]
+	trunc i32 %121 to i16		; <i16>:123 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %122, i16 %123 )		; <i16>:124 [#uses=1]
+	or i16 %124, %123		; <i16>:125 [#uses=1]
+	store i16 %125, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:126 [#uses=1]
+	zext i8 %126 to i32		; <i32>:127 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:128 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %128, i32 %127 )		; <i32>:129 [#uses=1]
+	or i32 %129, %127		; <i32>:130 [#uses=1]
+	store i32 %130, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:131 [#uses=1]
+	zext i8 %131 to i32		; <i32>:132 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:133 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %133, i32 %132 )		; <i32>:134 [#uses=1]
+	or i32 %134, %132		; <i32>:135 [#uses=1]
+	store i32 %135, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:136 [#uses=1]
+	zext i8 %136 to i64		; <i64>:137 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:138 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %138, i64 %137 )		; <i64>:139 [#uses=1]
+	or i64 %139, %137		; <i64>:140 [#uses=1]
+	store i64 %140, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:141 [#uses=1]
+	zext i8 %141 to i64		; <i64>:142 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:143 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %143, i64 %142 )		; <i64>:144 [#uses=1]
+	or i64 %144, %142		; <i64>:145 [#uses=1]
+	store i64 %145, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:146 [#uses=1]
+	zext i8 %146 to i64		; <i64>:147 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:148 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %148, i64 %147 )		; <i64>:149 [#uses=1]
+	or i64 %149, %147		; <i64>:150 [#uses=1]
+	store i64 %150, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:151 [#uses=1]
+	zext i8 %151 to i64		; <i64>:152 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:153 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %153, i64 %152 )		; <i64>:154 [#uses=1]
+	or i64 %154, %152		; <i64>:155 [#uses=1]
+	store i64 %155, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:156 [#uses=1]
+	zext i8 %156 to i32		; <i32>:157 [#uses=1]
+	trunc i32 %157 to i8		; <i8>:158 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 %158 )		; <i8>:159 [#uses=1]
+	xor i8 %159, %158		; <i8>:160 [#uses=1]
+	store i8 %160, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:161 [#uses=1]
+	zext i8 %161 to i32		; <i32>:162 [#uses=1]
+	trunc i32 %162 to i8		; <i8>:163 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 %163 )		; <i8>:164 [#uses=1]
+	xor i8 %164, %163		; <i8>:165 [#uses=1]
+	store i8 %165, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:166 [#uses=1]
+	zext i8 %166 to i32		; <i32>:167 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:168 [#uses=1]
+	trunc i32 %167 to i16		; <i16>:169 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %168, i16 %169 )		; <i16>:170 [#uses=1]
+	xor i16 %170, %169		; <i16>:171 [#uses=1]
+	store i16 %171, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:172 [#uses=1]
+	zext i8 %172 to i32		; <i32>:173 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:174 [#uses=1]
+	trunc i32 %173 to i16		; <i16>:175 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %174, i16 %175 )		; <i16>:176 [#uses=1]
+	xor i16 %176, %175		; <i16>:177 [#uses=1]
+	store i16 %177, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:178 [#uses=1]
+	zext i8 %178 to i32		; <i32>:179 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:180 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %180, i32 %179 )		; <i32>:181 [#uses=1]
+	xor i32 %181, %179		; <i32>:182 [#uses=1]
+	store i32 %182, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:183 [#uses=1]
+	zext i8 %183 to i32		; <i32>:184 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:185 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %185, i32 %184 )		; <i32>:186 [#uses=1]
+	xor i32 %186, %184		; <i32>:187 [#uses=1]
+	store i32 %187, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:188 [#uses=1]
+	zext i8 %188 to i64		; <i64>:189 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:190 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %190, i64 %189 )		; <i64>:191 [#uses=1]
+	xor i64 %191, %189		; <i64>:192 [#uses=1]
+	store i64 %192, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:193 [#uses=1]
+	zext i8 %193 to i64		; <i64>:194 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:195 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %195, i64 %194 )		; <i64>:196 [#uses=1]
+	xor i64 %196, %194		; <i64>:197 [#uses=1]
+	store i64 %197, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:198 [#uses=1]
+	zext i8 %198 to i64		; <i64>:199 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:200 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %200, i64 %199 )		; <i64>:201 [#uses=1]
+	xor i64 %201, %199		; <i64>:202 [#uses=1]
+	store i64 %202, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:203 [#uses=1]
+	zext i8 %203 to i64		; <i64>:204 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:205 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %205, i64 %204 )		; <i64>:206 [#uses=1]
+	xor i64 %206, %204		; <i64>:207 [#uses=1]
+	store i64 %207, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:208 [#uses=1]
+	zext i8 %208 to i32		; <i32>:209 [#uses=1]
+	trunc i32 %209 to i8		; <i8>:210 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 %210 )		; <i8>:211 [#uses=1]
+	and i8 %211, %210		; <i8>:212 [#uses=1]
+	store i8 %212, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:213 [#uses=1]
+	zext i8 %213 to i32		; <i32>:214 [#uses=1]
+	trunc i32 %214 to i8		; <i8>:215 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 %215 )		; <i8>:216 [#uses=1]
+	and i8 %216, %215		; <i8>:217 [#uses=1]
+	store i8 %217, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:218 [#uses=1]
+	zext i8 %218 to i32		; <i32>:219 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:220 [#uses=1]
+	trunc i32 %219 to i16		; <i16>:221 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %220, i16 %221 )		; <i16>:222 [#uses=1]
+	and i16 %222, %221		; <i16>:223 [#uses=1]
+	store i16 %223, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:224 [#uses=1]
+	zext i8 %224 to i32		; <i32>:225 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:226 [#uses=1]
+	trunc i32 %225 to i16		; <i16>:227 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %226, i16 %227 )		; <i16>:228 [#uses=1]
+	and i16 %228, %227		; <i16>:229 [#uses=1]
+	store i16 %229, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:230 [#uses=1]
+	zext i8 %230 to i32		; <i32>:231 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:232 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %232, i32 %231 )		; <i32>:233 [#uses=1]
+	and i32 %233, %231		; <i32>:234 [#uses=1]
+	store i32 %234, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:235 [#uses=1]
+	zext i8 %235 to i32		; <i32>:236 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:237 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %237, i32 %236 )		; <i32>:238 [#uses=1]
+	and i32 %238, %236		; <i32>:239 [#uses=1]
+	store i32 %239, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:240 [#uses=1]
+	zext i8 %240 to i64		; <i64>:241 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:242 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %242, i64 %241 )		; <i64>:243 [#uses=1]
+	and i64 %243, %241		; <i64>:244 [#uses=1]
+	store i64 %244, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:245 [#uses=1]
+	zext i8 %245 to i64		; <i64>:246 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:247 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %247, i64 %246 )		; <i64>:248 [#uses=1]
+	and i64 %248, %246		; <i64>:249 [#uses=1]
+	store i64 %249, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:250 [#uses=1]
+	zext i8 %250 to i64		; <i64>:251 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:252 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %252, i64 %251 )		; <i64>:253 [#uses=1]
+	and i64 %253, %251		; <i64>:254 [#uses=1]
+	store i64 %254, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:255 [#uses=1]
+	zext i8 %255 to i64		; <i64>:256 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:257 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %257, i64 %256 )		; <i64>:258 [#uses=1]
+	and i64 %258, %256		; <i64>:259 [#uses=1]
+	store i64 %259, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:260 [#uses=1]
+	zext i8 %260 to i32		; <i32>:261 [#uses=1]
+	trunc i32 %261 to i8		; <i8>:262 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 %262 )		; <i8>:263 [#uses=1]
+	xor i8 %263, -1		; <i8>:264 [#uses=1]
+	and i8 %264, %262		; <i8>:265 [#uses=1]
+	store i8 %265, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:266 [#uses=1]
+	zext i8 %266 to i32		; <i32>:267 [#uses=1]
+	trunc i32 %267 to i8		; <i8>:268 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 %268 )		; <i8>:269 [#uses=1]
+	xor i8 %269, -1		; <i8>:270 [#uses=1]
+	and i8 %270, %268		; <i8>:271 [#uses=1]
+	store i8 %271, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:272 [#uses=1]
+	zext i8 %272 to i32		; <i32>:273 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:274 [#uses=1]
+	trunc i32 %273 to i16		; <i16>:275 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %274, i16 %275 )		; <i16>:276 [#uses=1]
+	xor i16 %276, -1		; <i16>:277 [#uses=1]
+	and i16 %277, %275		; <i16>:278 [#uses=1]
+	store i16 %278, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:279 [#uses=1]
+	zext i8 %279 to i32		; <i32>:280 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:281 [#uses=1]
+	trunc i32 %280 to i16		; <i16>:282 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %281, i16 %282 )		; <i16>:283 [#uses=1]
+	xor i16 %283, -1		; <i16>:284 [#uses=1]
+	and i16 %284, %282		; <i16>:285 [#uses=1]
+	store i16 %285, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:286 [#uses=1]
+	zext i8 %286 to i32		; <i32>:287 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:288 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %288, i32 %287 )		; <i32>:289 [#uses=1]
+	xor i32 %289, -1		; <i32>:290 [#uses=1]
+	and i32 %290, %287		; <i32>:291 [#uses=1]
+	store i32 %291, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:292 [#uses=1]
+	zext i8 %292 to i32		; <i32>:293 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:294 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %294, i32 %293 )		; <i32>:295 [#uses=1]
+	xor i32 %295, -1		; <i32>:296 [#uses=1]
+	and i32 %296, %293		; <i32>:297 [#uses=1]
+	store i32 %297, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:298 [#uses=1]
+	zext i8 %298 to i64		; <i64>:299 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:300 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %300, i64 %299 )		; <i64>:301 [#uses=1]
+	xor i64 %301, -1		; <i64>:302 [#uses=1]
+	and i64 %302, %299		; <i64>:303 [#uses=1]
+	store i64 %303, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:304 [#uses=1]
+	zext i8 %304 to i64		; <i64>:305 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:306 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %306, i64 %305 )		; <i64>:307 [#uses=1]
+	xor i64 %307, -1		; <i64>:308 [#uses=1]
+	and i64 %308, %305		; <i64>:309 [#uses=1]
+	store i64 %309, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:310 [#uses=1]
+	zext i8 %310 to i64		; <i64>:311 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:312 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %312, i64 %311 )		; <i64>:313 [#uses=1]
+	xor i64 %313, -1		; <i64>:314 [#uses=1]
+	and i64 %314, %311		; <i64>:315 [#uses=1]
+	store i64 %315, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:316 [#uses=1]
+	zext i8 %316 to i64		; <i64>:317 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:318 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %318, i64 %317 )		; <i64>:319 [#uses=1]
+	xor i64 %319, -1		; <i64>:320 [#uses=1]
+	and i64 %320, %317		; <i64>:321 [#uses=1]
+	store i64 %321, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	load i8* @sc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	load i8* @uc, align 1		; <i8>:2 [#uses=1]
+	zext i8 %2 to i32		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i8		; <i8>:4 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:5 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %4, i8 %5 )		; <i8>:6 [#uses=1]
+	store i8 %6, i8* @sc, align 1
+	load i8* @sc, align 1		; <i8>:7 [#uses=1]
+	zext i8 %7 to i32		; <i32>:8 [#uses=1]
+	load i8* @uc, align 1		; <i8>:9 [#uses=1]
+	zext i8 %9 to i32		; <i32>:10 [#uses=1]
+	trunc i32 %10 to i8		; <i8>:11 [#uses=1]
+	trunc i32 %8 to i8		; <i8>:12 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %11, i8 %12 )		; <i8>:13 [#uses=1]
+	store i8 %13, i8* @uc, align 1
+	load i8* @sc, align 1		; <i8>:14 [#uses=1]
+	sext i8 %14 to i16		; <i16>:15 [#uses=1]
+	zext i16 %15 to i32		; <i32>:16 [#uses=1]
+	load i8* @uc, align 1		; <i8>:17 [#uses=1]
+	zext i8 %17 to i32		; <i32>:18 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:19 [#uses=1]
+	trunc i32 %18 to i16		; <i16>:20 [#uses=1]
+	trunc i32 %16 to i16		; <i16>:21 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %19, i16 %20, i16 %21 )		; <i16>:22 [#uses=1]
+	store i16 %22, i16* @ss, align 2
+	load i8* @sc, align 1		; <i8>:23 [#uses=1]
+	sext i8 %23 to i16		; <i16>:24 [#uses=1]
+	zext i16 %24 to i32		; <i32>:25 [#uses=1]
+	load i8* @uc, align 1		; <i8>:26 [#uses=1]
+	zext i8 %26 to i32		; <i32>:27 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:28 [#uses=1]
+	trunc i32 %27 to i16		; <i16>:29 [#uses=1]
+	trunc i32 %25 to i16		; <i16>:30 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %28, i16 %29, i16 %30 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @us, align 2
+	load i8* @sc, align 1		; <i8>:32 [#uses=1]
+	sext i8 %32 to i32		; <i32>:33 [#uses=1]
+	load i8* @uc, align 1		; <i8>:34 [#uses=1]
+	zext i8 %34 to i32		; <i32>:35 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %36, i32 %35, i32 %33 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @si, align 4
+	load i8* @sc, align 1		; <i8>:38 [#uses=1]
+	sext i8 %38 to i32		; <i32>:39 [#uses=1]
+	load i8* @uc, align 1		; <i8>:40 [#uses=1]
+	zext i8 %40 to i32		; <i32>:41 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %42, i32 %41, i32 %39 )		; <i32>:43 [#uses=1]
+	store i32 %43, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:44 [#uses=1]
+	sext i8 %44 to i64		; <i64>:45 [#uses=1]
+	load i8* @uc, align 1		; <i8>:46 [#uses=1]
+	zext i8 %46 to i64		; <i64>:47 [#uses=1]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:48 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %48, i64 %47, i64 %45 )		; <i64>:49 [#uses=1]
+	store i64 %49, i64* @sl, align 8
+	load i8* @sc, align 1		; <i8>:50 [#uses=1]
+	sext i8 %50 to i64		; <i64>:51 [#uses=1]
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i64		; <i64>:53 [#uses=1]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:54 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %54, i64 %53, i64 %51 )		; <i64>:55 [#uses=1]
+	store i64 %55, i64* @ul, align 8
+	load i8* @sc, align 1		; <i8>:56 [#uses=1]
+	sext i8 %56 to i64		; <i64>:57 [#uses=1]
+	load i8* @uc, align 1		; <i8>:58 [#uses=1]
+	zext i8 %58 to i64		; <i64>:59 [#uses=1]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:60 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %60, i64 %59, i64 %57 )		; <i64>:61 [#uses=1]
+	store i64 %61, i64* @sll, align 8
+	load i8* @sc, align 1		; <i8>:62 [#uses=1]
+	sext i8 %62 to i64		; <i64>:63 [#uses=1]
+	load i8* @uc, align 1		; <i8>:64 [#uses=1]
+	zext i8 %64 to i64		; <i64>:65 [#uses=1]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %66, i64 %65, i64 %63 )		; <i64>:67 [#uses=1]
+	store i64 %67, i64* @ull, align 8
+	load i8* @sc, align 1		; <i8>:68 [#uses=1]
+	zext i8 %68 to i32		; <i32>:69 [#uses=1]
+	load i8* @uc, align 1		; <i8>:70 [#uses=1]
+	zext i8 %70 to i32		; <i32>:71 [#uses=1]
+	trunc i32 %71 to i8		; <i8>:72 [#uses=2]
+	trunc i32 %69 to i8		; <i8>:73 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %72, i8 %73 )		; <i8>:74 [#uses=1]
+	icmp eq i8 %74, %72		; <i1>:75 [#uses=1]
+	zext i1 %75 to i8		; <i8>:76 [#uses=1]
+	zext i8 %76 to i32		; <i32>:77 [#uses=1]
+	store i32 %77, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:78 [#uses=1]
+	zext i8 %78 to i32		; <i32>:79 [#uses=1]
+	load i8* @uc, align 1		; <i8>:80 [#uses=1]
+	zext i8 %80 to i32		; <i32>:81 [#uses=1]
+	trunc i32 %81 to i8		; <i8>:82 [#uses=2]
+	trunc i32 %79 to i8		; <i8>:83 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %82, i8 %83 )		; <i8>:84 [#uses=1]
+	icmp eq i8 %84, %82		; <i1>:85 [#uses=1]
+	zext i1 %85 to i8		; <i8>:86 [#uses=1]
+	zext i8 %86 to i32		; <i32>:87 [#uses=1]
+	store i32 %87, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:88 [#uses=1]
+	sext i8 %88 to i16		; <i16>:89 [#uses=1]
+	zext i16 %89 to i32		; <i32>:90 [#uses=1]
+	load i8* @uc, align 1		; <i8>:91 [#uses=1]
+	zext i8 %91 to i32		; <i32>:92 [#uses=1]
+	trunc i32 %92 to i8		; <i8>:93 [#uses=2]
+	trunc i32 %90 to i8		; <i8>:94 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 )		; <i8>:95 [#uses=1]
+	icmp eq i8 %95, %93		; <i1>:96 [#uses=1]
+	zext i1 %96 to i8		; <i8>:97 [#uses=1]
+	zext i8 %97 to i32		; <i32>:98 [#uses=1]
+	store i32 %98, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:99 [#uses=1]
+	sext i8 %99 to i16		; <i16>:100 [#uses=1]
+	zext i16 %100 to i32		; <i32>:101 [#uses=1]
+	load i8* @uc, align 1		; <i8>:102 [#uses=1]
+	zext i8 %102 to i32		; <i32>:103 [#uses=1]
+	trunc i32 %103 to i8		; <i8>:104 [#uses=2]
+	trunc i32 %101 to i8		; <i8>:105 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 )		; <i8>:106 [#uses=1]
+	icmp eq i8 %106, %104		; <i1>:107 [#uses=1]
+	zext i1 %107 to i8		; <i8>:108 [#uses=1]
+	zext i8 %108 to i32		; <i32>:109 [#uses=1]
+	store i32 %109, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:110 [#uses=1]
+	sext i8 %110 to i32		; <i32>:111 [#uses=1]
+	load i8* @uc, align 1		; <i8>:112 [#uses=1]
+	zext i8 %112 to i32		; <i32>:113 [#uses=1]
+	trunc i32 %113 to i8		; <i8>:114 [#uses=2]
+	trunc i32 %111 to i8		; <i8>:115 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 )		; <i8>:116 [#uses=1]
+	icmp eq i8 %116, %114		; <i1>:117 [#uses=1]
+	zext i1 %117 to i8		; <i8>:118 [#uses=1]
+	zext i8 %118 to i32		; <i32>:119 [#uses=1]
+	store i32 %119, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:120 [#uses=1]
+	sext i8 %120 to i32		; <i32>:121 [#uses=1]
+	load i8* @uc, align 1		; <i8>:122 [#uses=1]
+	zext i8 %122 to i32		; <i32>:123 [#uses=1]
+	trunc i32 %123 to i8		; <i8>:124 [#uses=2]
+	trunc i32 %121 to i8		; <i8>:125 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 )		; <i8>:126 [#uses=1]
+	icmp eq i8 %126, %124		; <i1>:127 [#uses=1]
+	zext i1 %127 to i8		; <i8>:128 [#uses=1]
+	zext i8 %128 to i32		; <i32>:129 [#uses=1]
+	store i32 %129, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:130 [#uses=1]
+	sext i8 %130 to i64		; <i64>:131 [#uses=1]
+	load i8* @uc, align 1		; <i8>:132 [#uses=1]
+	zext i8 %132 to i64		; <i64>:133 [#uses=1]
+	trunc i64 %133 to i8		; <i8>:134 [#uses=2]
+	trunc i64 %131 to i8		; <i8>:135 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 )		; <i8>:136 [#uses=1]
+	icmp eq i8 %136, %134		; <i1>:137 [#uses=1]
+	zext i1 %137 to i8		; <i8>:138 [#uses=1]
+	zext i8 %138 to i32		; <i32>:139 [#uses=1]
+	store i32 %139, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:140 [#uses=1]
+	sext i8 %140 to i64		; <i64>:141 [#uses=1]
+	load i8* @uc, align 1		; <i8>:142 [#uses=1]
+	zext i8 %142 to i64		; <i64>:143 [#uses=1]
+	trunc i64 %143 to i8		; <i8>:144 [#uses=2]
+	trunc i64 %141 to i8		; <i8>:145 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 )		; <i8>:146 [#uses=1]
+	icmp eq i8 %146, %144		; <i1>:147 [#uses=1]
+	zext i1 %147 to i8		; <i8>:148 [#uses=1]
+	zext i8 %148 to i32		; <i32>:149 [#uses=1]
+	store i32 %149, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:150 [#uses=1]
+	sext i8 %150 to i64		; <i64>:151 [#uses=1]
+	load i8* @uc, align 1		; <i8>:152 [#uses=1]
+	zext i8 %152 to i64		; <i64>:153 [#uses=1]
+	trunc i64 %153 to i8		; <i8>:154 [#uses=2]
+	trunc i64 %151 to i8		; <i8>:155 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 )		; <i8>:156 [#uses=1]
+	icmp eq i8 %156, %154		; <i1>:157 [#uses=1]
+	zext i1 %157 to i8		; <i8>:158 [#uses=1]
+	zext i8 %158 to i32		; <i32>:159 [#uses=1]
+	store i32 %159, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:160 [#uses=1]
+	sext i8 %160 to i64		; <i64>:161 [#uses=1]
+	load i8* @uc, align 1		; <i8>:162 [#uses=1]
+	zext i8 %162 to i64		; <i64>:163 [#uses=1]
+	trunc i64 %163 to i8		; <i8>:164 [#uses=2]
+	trunc i64 %161 to i8		; <i8>:165 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 )		; <i8>:166 [#uses=1]
+	icmp eq i8 %166, %164		; <i1>:167 [#uses=1]
+	zext i1 %167 to i8		; <i8>:168 [#uses=1]
+	zext i8 %168 to i32		; <i32>:169 [#uses=1]
+	store i32 %169, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64*, i64, i64) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %10, i64 1 )		; <i64>:11 [#uses=1]
+	store i64 %11, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %12, i64 1 )		; <i64>:13 [#uses=1]
+	store i64 %13, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:14 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %14, i64 1 )		; <i64>:15 [#uses=1]
+	store i64 %15, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:16 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %16, i64 1 )		; <i64>:17 [#uses=1]
+	store i64 %17, i64* @ull, align 8
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true, i1 false )
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:18 [#uses=1]
+	volatile store i16 0, i16* %18, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:19 [#uses=1]
+	volatile store i16 0, i16* %19, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	volatile store i32 0, i32* %20, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:21 [#uses=1]
+	volatile store i32 0, i32* %21, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:22 [#uses=1]
+	volatile store i64 0, i64* %22, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:23 [#uses=1]
+	volatile store i64 0, i64* %23, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:24 [#uses=1]
+	volatile store i64 0, i64* %24, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:25 [#uses=1]
+	volatile store i64 0, i64* %25, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.swap.i64.p0i64(i64*, i64) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind

diff --git a/test/CodeGen/X86/SwitchLowering.ll b/test/CodeGen/X86/SwitchLowering.ll
new file mode 100644
index 0000000..29a0e82
--- /dev/null
+++ b/test/CodeGen/X86/SwitchLowering.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; PR964
+
+define i8* @FindChar(i8* %CurPtr) {
+entry:
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=3]
+        %CurPtr_addr.0.rec = bitcast i32 %indvar to i32         ; <i32> [#uses=1]
+        %gep.upgrd.1 = zext i32 %indvar to i64          ; <i64> [#uses=1]
+        %CurPtr_addr.0 = getelementptr i8* %CurPtr, i64 %gep.upgrd.1            ; <i8*> [#uses=1]
+        %tmp = load i8* %CurPtr_addr.0          ; <i8> [#uses=3]
+        %tmp2.rec = add i32 %CurPtr_addr.0.rec, 1               ; <i32> [#uses=1]
+        %tmp2 = getelementptr i8* %CurPtr, i32 %tmp2.rec                ; <i8*> [#uses=1]
+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+        switch i8 %tmp, label %bb [
+                 i8 0, label %bb7
+                 i8 120, label %bb7
+        ]
+
+bb7:            ; preds = %bb, %bb
+        tail call void @foo( i8 %tmp )
+        ret i8* %tmp2
+}
+
+declare void @foo(i8)
+

diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
new file mode 100644
index 0000000..9208738
--- /dev/null
+++ b/test/CodeGen/X86/abi-isel.ll

@@ -0,0 +1,9646 @@
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX-64-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -post-RA-scheduler=false | FileCheck %s -check-prefix=DARWIN-64-PIC
+
+@src = external global [131072 x i32]
+@dst = external global [131072 x i32]
+@xsrc = external global [32 x i32]
+@xdst = external global [32 x i32]
+@ptr = external global i32*
+@dsrc = global [131072 x i32] zeroinitializer, align 32
+@ddst = global [131072 x i32] zeroinitializer, align 32
+@dptr = global i32* null
+@lsrc = internal global [131072 x i32] zeroinitializer
+@ldst = internal global [131072 x i32] zeroinitializer
+@lptr = internal global i32* null
+@ifunc = external global void ()*
+@difunc = global void ()* null
+@lifunc = internal global void ()* null
+@lxsrc = internal global [32 x i32] zeroinitializer, align 32
+@lxdst = internal global [32 x i32] zeroinitializer, align 32
+@dxsrc = global [32 x i32] zeroinitializer, align 32
+@dxdst = global [32 x i32] zeroinitializer, align 32
+
+define void @foo00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
+	ret void
+
+; LINUX-64-STATIC: foo00:
+; LINUX-64-STATIC: movl	src(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, dst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo00:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo00:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo00:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo00:
+; DARWIN-32-PIC: 	call	L1$pb
+; DARWIN-32-PIC-NEXT: L1$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L1$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L1$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
+	ret void
+
+; LINUX-64-STATIC: fxo00:
+; LINUX-64-STATIC: movl	xsrc(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, xdst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo00:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo00:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo00:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo00:
+; DARWIN-32-PIC: 	call	L2$pb
+; DARWIN-32-PIC-NEXT: L2$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L2$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L2$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo01() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: foo01:
+; LINUX-64-STATIC: movq	$dst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo01:
+; LINUX-32-STATIC: 	movl	$dst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo01:
+; LINUX-32-PIC: 	movl	$dst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo01:
+; DARWIN-32-STATIC: 	movl	$_dst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo01:
+; DARWIN-32-PIC: 	call	L3$pb
+; DARWIN-32-PIC-NEXT: L3$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L3$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo01() nounwind {
+entry:
+	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: fxo01:
+; LINUX-64-STATIC: movq	$xdst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo01:
+; LINUX-32-STATIC: 	movl	$xdst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo01:
+; LINUX-32-PIC: 	movl	$xdst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo01:
+; DARWIN-32-STATIC: 	movl	$_xdst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo01:
+; DARWIN-32-PIC: 	call	L4$pb
+; DARWIN-32-PIC-NEXT: L4$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L4$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo02:
+; LINUX-64-STATIC: movl    src(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo02:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo02:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo02:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo02:
+; DARWIN-32-PIC: 	call	L5$pb
+; DARWIN-32-PIC-NEXT: L5$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L5$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+; LINUX-64-STATIC: fxo02:
+; LINUX-64-STATIC: movl    xsrc(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo02:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: fxo02:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo02:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo02:
+; DARWIN-32-PIC: 	call	L6$pb
+; DARWIN-32-PIC-NEXT: L6$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L6$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L6$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo03() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
+	ret void
+; LINUX-64-STATIC: foo03:
+; LINUX-64-STATIC: movl    dsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo03:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo03:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo03:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo03:
+; DARWIN-32-PIC: 	call	L7$pb
+; DARWIN-32-PIC-NEXT: L7$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L7$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L7$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo03:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo03:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo04() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: foo04:
+; LINUX-64-STATIC: movq    $ddst, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo04:
+; LINUX-32-STATIC: 	movl	$ddst, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo04:
+; LINUX-32-PIC: 	movl	$ddst, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo04:
+; DARWIN-32-STATIC: 	movl	$_ddst, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo04:
+; DARWIN-32-PIC: 	call	L8$pb
+; DARWIN-32-PIC-NEXT: L8$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L8$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L8$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo05:
+; LINUX-64-STATIC: movl    dsrc(%rip), %
+; LINUX-64-STATIC: movq    dptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo05:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo05:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo05:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo05:
+; DARWIN-32-PIC: 	call	L9$pb
+; DARWIN-32-PIC-NEXT: L9$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L9$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L9$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo05:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo05:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo06() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
+	ret void
+; LINUX-64-STATIC: foo06:
+; LINUX-64-STATIC: movl    lsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo06:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo06:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo06:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo06:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo06:
+; DARWIN-32-PIC: 	call	L10$pb
+; DARWIN-32-PIC-NEXT: L10$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L10$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L10$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo06:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo06:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo07() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: foo07:
+; LINUX-64-STATIC: movq    $ldst, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo07:
+; LINUX-32-STATIC: 	movl	$ldst, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo07:
+; LINUX-32-PIC: 	movl	$ldst, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo07:
+; DARWIN-32-STATIC: 	movl	$_ldst, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo07:
+; DARWIN-32-PIC: 	call	L11$pb
+; DARWIN-32-PIC-NEXT: L11$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L11$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L11$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo08:
+; LINUX-64-STATIC: movl    lsrc(%rip), %
+; LINUX-64-STATIC: movq    lptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo08:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo08:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo08:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo08:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo08:
+; DARWIN-32-PIC: 	call	L12$pb
+; DARWIN-32-PIC-NEXT: L12$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L12$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L12$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo08:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo08:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qux00:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux00:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux00:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux00:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux00:
+; DARWIN-32-PIC: 	call	L13$pb
+; DARWIN-32-PIC-NEXT: L13$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L13$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L13$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qxx00:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx00:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx00:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx00:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx00:
+; DARWIN-32-PIC: 	call	L14$pb
+; DARWIN-32-PIC-NEXT: L14$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L14$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L14$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux01() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: qux01:
+; LINUX-64-STATIC: movq    $dst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux01:
+; LINUX-32-STATIC: 	movl	$dst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux01:
+; LINUX-32-PIC: 	movl	$dst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux01:
+; DARWIN-32-PIC: 	call	L15$pb
+; DARWIN-32-PIC-NEXT: L15$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L15$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx01() nounwind {
+entry:
+	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: qxx01:
+; LINUX-64-STATIC: movq    $xdst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx01:
+; LINUX-32-STATIC: 	movl	$xdst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx01:
+; LINUX-32-PIC: 	movl	$xdst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx01:
+; DARWIN-32-PIC: 	call	L16$pb
+; DARWIN-32-PIC-NEXT: L16$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L16$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux02:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux02:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux02:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux02:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux02:
+; DARWIN-32-PIC: 	call	L17$pb
+; DARWIN-32-PIC-NEXT: L17$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L17$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qxx02:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx02:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qxx02:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx02:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx02:
+; DARWIN-32-PIC: 	call	L18$pb
+; DARWIN-32-PIC-NEXT: L18$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L18$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L18$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux03() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
+	ret void
+; LINUX-64-STATIC: qux03:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux03:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux03:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux03:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux03:
+; DARWIN-32-PIC: 	call	L19$pb
+; DARWIN-32-PIC-NEXT: L19$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L19$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L19$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux03:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux03:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux04() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: qux04:
+; LINUX-64-STATIC: movq    $ddst+64, dptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux04:
+; LINUX-32-STATIC: 	movl	$ddst+64, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux04:
+; LINUX-32-PIC: 	movl	$ddst+64, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux04:
+; DARWIN-32-PIC: 	call	L20$pb
+; DARWIN-32-PIC-NEXT: L20$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L20$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L20$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux05:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux05:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux05:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux05:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux05:
+; DARWIN-32-PIC: 	call	L21$pb
+; DARWIN-32-PIC-NEXT: L21$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L21$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L21$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux05:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux05:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux06() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qux06:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux06:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux06:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux06:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+64(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux06:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux06:
+; DARWIN-32-PIC: 	call	L22$pb
+; DARWIN-32-PIC-NEXT: L22$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L22$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L22$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux06:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux06:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux07() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: qux07:
+; LINUX-64-STATIC: movq    $ldst+64, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux07:
+; LINUX-32-STATIC: 	movl	$ldst+64, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux07:
+; LINUX-32-PIC: 	movl	$ldst+64, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux07:
+; DARWIN-32-PIC: 	call	L23$pb
+; DARWIN-32-PIC-NEXT: L23$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L23$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L23$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux08:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux08:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux08:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux08:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux08:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux08:
+; DARWIN-32-PIC: 	call	L24$pb
+; DARWIN-32-PIC-NEXT: L24$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L24$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L24$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux08:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux08:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind00(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind00:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind00:
+; DARWIN-32-PIC: 	call	L25$pb
+; DARWIN-32-PIC-NEXT: L25$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L25$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L25$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd00(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ixd00:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd00:
+; DARWIN-32-PIC: 	call	L26$pb
+; DARWIN-32-PIC-NEXT: L26$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L26$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L26$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind01(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: ind01:
+; LINUX-64-STATIC: leaq    dst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind01:
+; DARWIN-32-PIC: 	call	L27$pb
+; DARWIN-32-PIC-NEXT: L27$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L27$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd01(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: ixd01:
+; LINUX-64-STATIC: leaq    xdst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd01:
+; DARWIN-32-PIC: 	call	L28$pb
+; DARWIN-32-PIC-NEXT: L28$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L28$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind02:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind02:
+; DARWIN-32-PIC: 	call	L29$pb
+; DARWIN-32-PIC-NEXT: L29$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L29$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ixd02:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd02:
+; DARWIN-32-PIC: 	call	L30$pb
+; DARWIN-32-PIC-NEXT: L30$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L30$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L30$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind03(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind03:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind03:
+; DARWIN-32-PIC: 	call	L31$pb
+; DARWIN-32-PIC-NEXT: L31$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L31$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L31$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind04(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: ind04:
+; LINUX-64-STATIC: leaq    ddst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind04:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind04:
+; DARWIN-32-PIC: 	call	L32$pb
+; DARWIN-32-PIC-NEXT: L32$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L32$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L32$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind05:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind05:
+; DARWIN-32-PIC: 	call	L33$pb
+; DARWIN-32-PIC-NEXT: L33$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L33$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L33$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind06(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind06:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind06:
+; DARWIN-32-PIC: 	call	L34$pb
+; DARWIN-32-PIC-NEXT: L34$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L34$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L34$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind07(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: ind07:
+; LINUX-64-STATIC: leaq    ldst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind07:
+; DARWIN-32-PIC: 	call	L35$pb
+; DARWIN-32-PIC-NEXT: L35$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L35$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L35$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind08:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind08:
+; DARWIN-32-PIC: 	call	L36$pb
+; DARWIN-32-PIC-NEXT: L36$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L36$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L36$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off00:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off00:
+; DARWIN-32-PIC: 	call	L37$pb
+; DARWIN-32-PIC-NEXT: L37$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L37$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L37$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: oxf00:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf00:
+; DARWIN-32-PIC: 	call	L38$pb
+; DARWIN-32-PIC-NEXT: L38$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L38$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L38$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: off01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off01:
+; DARWIN-32-PIC: 	call	L39$pb
+; DARWIN-32-PIC-NEXT: L39$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L39$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: oxf01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf01:
+; DARWIN-32-PIC: 	call	L40$pb
+; DARWIN-32-PIC-NEXT: L40$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L40$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off02:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off02:
+; DARWIN-32-PIC: 	call	L41$pb
+; DARWIN-32-PIC-NEXT: L41$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L41$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: oxf02:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf02:
+; DARWIN-32-PIC: 	call	L42$pb
+; DARWIN-32-PIC-NEXT: L42$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L42$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L42$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off03:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off03:
+; DARWIN-32-PIC: 	call	L43$pb
+; DARWIN-32-PIC-NEXT: L43$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L43$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L43$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off04(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: off04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off04:
+; DARWIN-32-PIC: 	call	L44$pb
+; DARWIN-32-PIC-NEXT: L44$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L44$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L44$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off05:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off05:
+; DARWIN-32-PIC: 	call	L45$pb
+; DARWIN-32-PIC-NEXT: L45$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L45$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L45$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off06:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off06:
+; DARWIN-32-PIC: 	call	L46$pb
+; DARWIN-32-PIC-NEXT: L46$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L46$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L46$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off07(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: off07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off07:
+; DARWIN-32-PIC: 	call	L47$pb
+; DARWIN-32-PIC-NEXT: L47$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L47$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L47$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off08:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off08:
+; DARWIN-32-PIC: 	call	L48$pb
+; DARWIN-32-PIC-NEXT: L48$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L48$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L48$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo00(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
+	ret void
+; LINUX-64-STATIC: moo00:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo00:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo00:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo00:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo00:
+; DARWIN-32-PIC: 	call	L49$pb
+; DARWIN-32-PIC-NEXT: L49$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L49$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L49$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo01(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: moo01:
+; LINUX-64-STATIC: movq    $dst+262144, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo01:
+; LINUX-32-STATIC: 	movl	$dst+262144, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo01:
+; LINUX-32-PIC: 	movl	$dst+262144, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo01:
+; DARWIN-32-PIC: 	call	L50$pb
+; DARWIN-32-PIC-NEXT: L50$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L50$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo02:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo02:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo02:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo02:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo02:
+; DARWIN-32-PIC: 	call	L51$pb
+; DARWIN-32-PIC-NEXT: L51$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L51$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L51$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo03(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
+	ret void
+; LINUX-64-STATIC: moo03:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo03:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo03:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo03:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo03:
+; DARWIN-32-PIC: 	call	L52$pb
+; DARWIN-32-PIC-NEXT: L52$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L52$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L52$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo03:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo03:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo04(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: moo04:
+; LINUX-64-STATIC: movq    $ddst+262144, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo04:
+; LINUX-32-PIC: 	movl	$ddst+262144, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo04:
+; DARWIN-32-PIC: 	call	L53$pb
+; DARWIN-32-PIC-NEXT: L53$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L53$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L53$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo05:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo05:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo05:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo05:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo05:
+; DARWIN-32-PIC: 	call	L54$pb
+; DARWIN-32-PIC-NEXT: L54$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L54$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L54$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo05:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo05:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo06(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
+	ret void
+; LINUX-64-STATIC: moo06:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo06:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo06:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo06:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+262144(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo06:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo06:
+; DARWIN-32-PIC: 	call	L55$pb
+; DARWIN-32-PIC-NEXT: L55$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L55$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L55$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo06:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo06:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo07(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: moo07:
+; LINUX-64-STATIC: movq    $ldst+262144, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo07:
+; LINUX-32-PIC: 	movl	$ldst+262144, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo07:
+; DARWIN-32-PIC: 	call	L56$pb
+; DARWIN-32-PIC-NEXT: L56$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L56$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L56$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo08:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo08:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo08:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo08:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo08:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo08:
+; DARWIN-32-PIC: 	call	L57$pb
+; DARWIN-32-PIC-NEXT: L57$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L57$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L57$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo08:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo08:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big00:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big00:
+; DARWIN-32-PIC: 	call	L58$pb
+; DARWIN-32-PIC-NEXT: L58$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L58$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: big01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big01:
+; DARWIN-32-PIC: 	call	L59$pb
+; DARWIN-32-PIC-NEXT: L59$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L59$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big02:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big02:
+; DARWIN-32-PIC: 	call	L60$pb
+; DARWIN-32-PIC-NEXT: L60$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L60$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L60$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big03:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big03:
+; DARWIN-32-PIC: 	call	L61$pb
+; DARWIN-32-PIC-NEXT: L61$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L61$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L61$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big04(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: big04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big04:
+; DARWIN-32-PIC: 	call	L62$pb
+; DARWIN-32-PIC-NEXT: L62$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L62$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L62$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big05:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big05:
+; DARWIN-32-PIC: 	call	L63$pb
+; DARWIN-32-PIC-NEXT: L63$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L63$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L63$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big06:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big06:
+; DARWIN-32-PIC: 	call	L64$pb
+; DARWIN-32-PIC-NEXT: L64$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L64$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L64$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big07(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: big07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big07:
+; DARWIN-32-PIC: 	call	L65$pb
+; DARWIN-32-PIC-NEXT: L65$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L65$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L65$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big08:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big08:
+; DARWIN-32-PIC: 	call	L66$pb
+; DARWIN-32-PIC-NEXT: L66$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L66$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L66$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar00() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: bar00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar00:
+; DARWIN-32-PIC: 	call	L67$pb
+; DARWIN-32-PIC-NEXT: L67$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L67$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxr00() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: bxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr00:
+; DARWIN-32-PIC: 	call	L68$pb
+; DARWIN-32-PIC-NEXT: L68$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L68$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar01() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: bar01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar01:
+; DARWIN-32-PIC: 	call	L69$pb
+; DARWIN-32-PIC-NEXT: L69$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L69$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxr01() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: bxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr01:
+; DARWIN-32-PIC: 	call	L70$pb
+; DARWIN-32-PIC-NEXT: L70$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L70$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar02() nounwind {
+entry:
+	ret i8* bitcast (i32** @ptr to i8*)
+; LINUX-64-STATIC: bar02:
+; LINUX-64-STATIC: movl    $ptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar02:
+; LINUX-32-STATIC: 	movl	$ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar02:
+; LINUX-32-PIC: 	movl	$ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar02:
+; DARWIN-32-STATIC: 	movl	$_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar02:
+; DARWIN-32-PIC: 	call	L71$pb
+; DARWIN-32-PIC-NEXT: L71$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L71$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar03() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: bar03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar03:
+; DARWIN-32-PIC: 	call	L72$pb
+; DARWIN-32-PIC-NEXT: L72$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L72$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar04() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: bar04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar04:
+; DARWIN-32-PIC: 	call	L73$pb
+; DARWIN-32-PIC-NEXT: L73$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L73$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar05() nounwind {
+entry:
+	ret i8* bitcast (i32** @dptr to i8*)
+; LINUX-64-STATIC: bar05:
+; LINUX-64-STATIC: movl    $dptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar05:
+; LINUX-32-STATIC: 	movl	$dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar05:
+; LINUX-32-PIC: 	movl	$dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar05:
+; DARWIN-32-STATIC: 	movl	$_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar05:
+; DARWIN-32-DYNAMIC: 	movl	$_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar05:
+; DARWIN-32-PIC: 	call	L74$pb
+; DARWIN-32-PIC-NEXT: L74$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L74$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar05:
+; DARWIN-64-STATIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar05:
+; DARWIN-64-DYNAMIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar05:
+; DARWIN-64-PIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar06() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: bar06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar06:
+; DARWIN-32-PIC: 	call	L75$pb
+; DARWIN-32-PIC-NEXT: L75$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L75$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar07() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: bar07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar07:
+; DARWIN-32-PIC: 	call	L76$pb
+; DARWIN-32-PIC-NEXT: L76$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L76$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar08() nounwind {
+entry:
+	ret i8* bitcast (i32** @lptr to i8*)
+; LINUX-64-STATIC: bar08:
+; LINUX-64-STATIC: movl    $lptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar08:
+; LINUX-32-STATIC: 	movl	$lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar08:
+; LINUX-32-PIC: 	movl	$lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar08:
+; LINUX-64-PIC: 	leaq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar08:
+; DARWIN-32-STATIC: 	movl	$_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar08:
+; DARWIN-32-DYNAMIC: 	movl	$_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar08:
+; DARWIN-32-PIC: 	call	L77$pb
+; DARWIN-32-PIC-NEXT: L77$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L77$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar08:
+; DARWIN-64-STATIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar08:
+; DARWIN-64-DYNAMIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar08:
+; DARWIN-64-PIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har00() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: har00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har00:
+; DARWIN-32-PIC: 	call	L78$pb
+; DARWIN-32-PIC-NEXT: L78$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L78$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @hxr00() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: hxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr00:
+; DARWIN-32-PIC: 	call	L79$pb
+; DARWIN-32-PIC-NEXT: L79$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L79$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har01() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: har01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har01:
+; DARWIN-32-PIC: 	call	L80$pb
+; DARWIN-32-PIC-NEXT: L80$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L80$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @hxr01() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: hxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr01:
+; DARWIN-32-PIC: 	call	L81$pb
+; DARWIN-32-PIC-NEXT: L81$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L81$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har02:
+; DARWIN-32-PIC: 	call	L82$pb
+; DARWIN-32-PIC-NEXT: L82$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L82$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har03() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: har03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har03:
+; DARWIN-32-PIC: 	call	L83$pb
+; DARWIN-32-PIC-NEXT: L83$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L83$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har04() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: har04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har04:
+; DARWIN-32-PIC: 	call	L84$pb
+; DARWIN-32-PIC-NEXT: L84$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L84$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har05:
+; DARWIN-32-PIC: 	call	L85$pb
+; DARWIN-32-PIC-NEXT: L85$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L85$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har06() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: har06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har06:
+; DARWIN-32-PIC: 	call	L86$pb
+; DARWIN-32-PIC-NEXT: L86$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L86$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har07() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: har07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har07:
+; DARWIN-32-PIC: 	call	L87$pb
+; DARWIN-32-PIC-NEXT: L87$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L87$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har08:
+; DARWIN-32-PIC: 	call	L88$pb
+; DARWIN-32-PIC-NEXT: L88$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L88$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat00:
+; LINUX-64-STATIC: movl    $src+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat00:
+; LINUX-32-STATIC: 	movl	$src+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat00:
+; LINUX-32-PIC: 	movl	$src+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat00:
+; DARWIN-32-STATIC: 	movl	$_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat00:
+; DARWIN-32-PIC: 	call	L89$pb
+; DARWIN-32-PIC-NEXT: L89$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L89$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxt00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt00:
+; LINUX-64-STATIC: movl    $xsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt00:
+; LINUX-32-STATIC: 	movl	$xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt00:
+; LINUX-32-PIC: 	movl	$xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt00:
+; DARWIN-32-STATIC: 	movl	$_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt00:
+; DARWIN-32-PIC: 	call	L90$pb
+; DARWIN-32-PIC-NEXT: L90$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L90$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat01:
+; LINUX-64-STATIC: movl    $dst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat01:
+; LINUX-32-STATIC: 	movl	$dst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat01:
+; LINUX-32-PIC: 	movl	$dst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat01:
+; DARWIN-32-PIC: 	call	L91$pb
+; DARWIN-32-PIC-NEXT: L91$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L91$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxt01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt01:
+; LINUX-64-STATIC: movl    $xdst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt01:
+; LINUX-32-STATIC: 	movl	$xdst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt01:
+; LINUX-32-PIC: 	movl	$xdst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt01:
+; DARWIN-32-PIC: 	call	L92$pb
+; DARWIN-32-PIC-NEXT: L92$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L92$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat02:
+; DARWIN-32-PIC: 	call	L93$pb
+; DARWIN-32-PIC-NEXT: L93$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L93$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat03() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat03:
+; LINUX-64-STATIC: movl    $dsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat03:
+; LINUX-32-STATIC: 	movl	$dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat03:
+; LINUX-32-PIC: 	movl	$dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat03:
+; DARWIN-32-PIC: 	call	L94$pb
+; DARWIN-32-PIC-NEXT: L94$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L94$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat03:
+; DARWIN-64-PIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat04() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat04:
+; LINUX-64-STATIC: movl    $ddst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat04:
+; LINUX-32-STATIC: 	movl	$ddst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat04:
+; LINUX-32-PIC: 	movl	$ddst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat04:
+; DARWIN-32-PIC: 	call	L95$pb
+; DARWIN-32-PIC-NEXT: L95$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L95$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat05:
+; DARWIN-32-PIC: 	call	L96$pb
+; DARWIN-32-PIC-NEXT: L96$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L96$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat06() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat06:
+; LINUX-64-STATIC: movl    $lsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat06:
+; LINUX-32-STATIC: 	movl	$lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat06:
+; LINUX-32-PIC: 	movl	$lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat06:
+; LINUX-64-PIC: 	leaq	lsrc+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat06:
+; DARWIN-32-PIC: 	call	L97$pb
+; DARWIN-32-PIC-NEXT: L97$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L97$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat06:
+; DARWIN-64-PIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat07() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat07:
+; LINUX-64-STATIC: movl    $ldst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat07:
+; LINUX-32-STATIC: 	movl	$ldst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat07:
+; LINUX-32-PIC: 	movl	$ldst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat07:
+; DARWIN-32-PIC: 	call	L98$pb
+; DARWIN-32-PIC-NEXT: L98$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L98$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat08:
+; DARWIN-32-PIC: 	call	L99$pb
+; DARWIN-32-PIC-NEXT: L99$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L99$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam00:
+; LINUX-64-STATIC: movl    $src+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam00:
+; LINUX-32-STATIC: 	movl	$src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam00:
+; LINUX-32-PIC: 	movl	$src+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam00:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam00:
+; DARWIN-32-STATIC: 	movl	$_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam00:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam00:
+; DARWIN-32-PIC: 	call	L100$pb
+; DARWIN-32-PIC-NEXT: L100$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L100$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam00:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam00:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam00:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam01:
+; LINUX-64-STATIC: movl    $dst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam01:
+; LINUX-32-STATIC: 	movl	$dst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam01:
+; LINUX-32-PIC: 	movl	$dst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam01:
+; DARWIN-32-PIC: 	call	L101$pb
+; DARWIN-32-PIC-NEXT: L101$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L101$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxm01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bxm01:
+; LINUX-64-STATIC: movl    $xdst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxm01:
+; LINUX-32-STATIC: 	movl	$xdst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxm01:
+; LINUX-32-PIC: 	movl	$xdst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxm01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxm01:
+; DARWIN-32-STATIC: 	movl	$_xdst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxm01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxm01:
+; DARWIN-32-PIC: 	call	L102$pb
+; DARWIN-32-PIC-NEXT: L102$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L102$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxm01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxm01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxm01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam02:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam02:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam02:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam02:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam02:
+; DARWIN-32-PIC: 	call	L103$pb
+; DARWIN-32-PIC-NEXT: L103$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L103$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam03() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam03:
+; LINUX-64-STATIC: movl    $dsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam03:
+; LINUX-32-STATIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam03:
+; LINUX-32-PIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam03:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam03:
+; DARWIN-32-PIC: 	call	L104$pb
+; DARWIN-32-PIC-NEXT: L104$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L104$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam03:
+; DARWIN-64-PIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam04() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam04:
+; LINUX-64-STATIC: movl    $ddst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam04:
+; LINUX-32-PIC: 	movl	$ddst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam04:
+; DARWIN-32-PIC: 	call	L105$pb
+; DARWIN-32-PIC-NEXT: L105$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L105$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam05:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam05:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam05:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam05:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam05:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam05:
+; DARWIN-32-PIC: 	call	L106$pb
+; DARWIN-32-PIC-NEXT: L106$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L106$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam05:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam05:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam05:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam06() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam06:
+; LINUX-64-STATIC: movl    $lsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam06:
+; LINUX-32-STATIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam06:
+; LINUX-32-PIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam06:
+; LINUX-64-PIC: 	leaq	lsrc+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam06:
+; DARWIN-32-PIC: 	call	L107$pb
+; DARWIN-32-PIC-NEXT: L107$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L107$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam06:
+; DARWIN-64-PIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam07() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam07:
+; LINUX-64-STATIC: movl    $ldst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam07:
+; LINUX-32-PIC: 	movl	$ldst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam07:
+; DARWIN-32-PIC: 	call	L108$pb
+; DARWIN-32-PIC-NEXT: L108$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L108$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam08:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam08:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam08:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam08:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam08:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam08:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam08:
+; DARWIN-32-PIC: 	call	L109$pb
+; DARWIN-32-PIC-NEXT: L109$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L109$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam08:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam08:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam08:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat00:
+; LINUX-64-STATIC: leaq    src+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat00:
+; DARWIN-32-PIC: 	call	L110$pb
+; DARWIN-32-PIC-NEXT: L110$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L110$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxt00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxt00:
+; LINUX-64-STATIC: leaq    xsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt00:
+; DARWIN-32-PIC: 	call	L111$pb
+; DARWIN-32-PIC-NEXT: L111$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L111$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat01:
+; DARWIN-32-PIC: 	call	L112$pb
+; DARWIN-32-PIC-NEXT: L112$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L112$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxt01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxt01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt01:
+; DARWIN-32-PIC: 	call	L113$pb
+; DARWIN-32-PIC-NEXT: L113$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L113$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat02:
+; DARWIN-32-PIC: 	call	L114$pb
+; DARWIN-32-PIC-NEXT: L114$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L114$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat03:
+; LINUX-64-STATIC: leaq    dsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat03:
+; DARWIN-32-PIC: 	call	L115$pb
+; DARWIN-32-PIC-NEXT: L115$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L115$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat04(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat04:
+; DARWIN-32-PIC: 	call	L116$pb
+; DARWIN-32-PIC-NEXT: L116$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L116$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat05:
+; DARWIN-32-PIC: 	call	L117$pb
+; DARWIN-32-PIC-NEXT: L117$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L117$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat06:
+; LINUX-64-STATIC: leaq    lsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat06:
+; DARWIN-32-PIC: 	call	L118$pb
+; DARWIN-32-PIC-NEXT: L118$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L118$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat07(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat07:
+; DARWIN-32-PIC: 	call	L119$pb
+; DARWIN-32-PIC-NEXT: L119$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L119$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat08:
+; DARWIN-32-PIC: 	call	L120$pb
+; DARWIN-32-PIC-NEXT: L120$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L120$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam00:
+; LINUX-64-STATIC: leaq    src+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam00:
+; DARWIN-32-PIC: 	call	L121$pb
+; DARWIN-32-PIC-NEXT: L121$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L121$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxm00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxm00:
+; LINUX-64-STATIC: leaq    xsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm00:
+; DARWIN-32-PIC: 	call	L122$pb
+; DARWIN-32-PIC-NEXT: L122$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L122$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam01:
+; DARWIN-32-PIC: 	call	L123$pb
+; DARWIN-32-PIC-NEXT: L123$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L123$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxm01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxm01:
+; LINUX-64-STATIC: leaq    xdst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm01:
+; DARWIN-32-PIC: 	call	L124$pb
+; DARWIN-32-PIC-NEXT: L124$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L124$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam02:
+; DARWIN-32-PIC: 	call	L125$pb
+; DARWIN-32-PIC-NEXT: L125$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L125$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam03:
+; LINUX-64-STATIC: leaq    dsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam03:
+; DARWIN-32-PIC: 	call	L126$pb
+; DARWIN-32-PIC-NEXT: L126$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L126$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam04(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam04:
+; DARWIN-32-PIC: 	call	L127$pb
+; DARWIN-32-PIC-NEXT: L127$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L127$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam05:
+; DARWIN-32-PIC: 	call	L128$pb
+; DARWIN-32-PIC-NEXT: L128$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L128$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam06:
+; LINUX-64-STATIC: leaq    lsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam06:
+; DARWIN-32-PIC: 	call	L129$pb
+; DARWIN-32-PIC-NEXT: L129$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L129$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam07(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam07:
+; DARWIN-32-PIC: 	call	L130$pb
+; DARWIN-32-PIC-NEXT: L130$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L130$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam08:
+; DARWIN-32-PIC: 	call	L131$pb
+; DARWIN-32-PIC-NEXT: L131$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L131$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @lcallee() nounwind {
+entry:
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	ret void
+; LINUX-64-STATIC: lcallee:
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	call	x
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	call	x
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	call	_x
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	call	L_x$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @x()
+
+define internal void @dcallee() nounwind {
+entry:
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	ret void
+; LINUX-64-STATIC: dcallee:
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcallee:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	call	y
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcallee:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	call	y
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcallee:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	call	_y
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	call	L_y$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcallee:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcallee:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcallee:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @y()
+
+define void ()* @address() nounwind {
+entry:
+	ret void ()* @callee
+; LINUX-64-STATIC: address:
+; LINUX-64-STATIC: movl    $callee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: address:
+; LINUX-32-STATIC: 	movl	$callee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: address:
+; LINUX-32-PIC: 	movl	$callee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: address:
+; LINUX-64-PIC: 	movq	callee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _address:
+; DARWIN-32-STATIC: 	movl	$_callee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _address:
+; DARWIN-32-DYNAMIC: 	movl	L_callee$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _address:
+; DARWIN-32-PIC: 	call	L134$pb
+; DARWIN-32-PIC-NEXT: L134$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L134$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _address:
+; DARWIN-64-STATIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _address:
+; DARWIN-64-DYNAMIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _address:
+; DARWIN-64-PIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @callee()
+
+define void ()* @laddress() nounwind {
+entry:
+	ret void ()* @lcallee
+; LINUX-64-STATIC: laddress:
+; LINUX-64-STATIC: movl    $lcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: laddress:
+; LINUX-32-STATIC: 	movl	$lcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: laddress:
+; LINUX-32-PIC: 	movl	$lcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: laddress:
+; LINUX-64-PIC: 	movq	lcallee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _laddress:
+; DARWIN-32-STATIC: 	movl	$_lcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _laddress:
+; DARWIN-32-DYNAMIC: 	movl	$_lcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _laddress:
+; DARWIN-32-PIC: 	call	L135$pb
+; DARWIN-32-PIC-NEXT: L135$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L135$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _laddress:
+; DARWIN-64-STATIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _laddress:
+; DARWIN-64-DYNAMIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _laddress:
+; DARWIN-64-PIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void ()* @daddress() nounwind {
+entry:
+	ret void ()* @dcallee
+; LINUX-64-STATIC: daddress:
+; LINUX-64-STATIC: movl    $dcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: daddress:
+; LINUX-32-STATIC: 	movl	$dcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: daddress:
+; LINUX-32-PIC: 	movl	$dcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: daddress:
+; LINUX-64-PIC: 	leaq	dcallee(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _daddress:
+; DARWIN-32-STATIC: 	movl	$_dcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _daddress:
+; DARWIN-32-DYNAMIC: 	movl	$_dcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _daddress:
+; DARWIN-32-PIC: 	call	L136$pb
+; DARWIN-32-PIC-NEXT: L136$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L136$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _daddress:
+; DARWIN-64-STATIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _daddress:
+; DARWIN-64-DYNAMIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _daddress:
+; DARWIN-64-PIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @caller() nounwind {
+entry:
+	call void @callee() nounwind
+	call void @callee() nounwind
+	ret void
+; LINUX-64-STATIC: caller:
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: caller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: caller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: caller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _caller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _caller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _caller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _caller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _caller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _caller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dcaller() nounwind {
+entry:
+	call void @dcallee() nounwind
+	call void @dcallee() nounwind
+	ret void
+; LINUX-64-STATIC: dcaller:
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @lcaller() nounwind {
+entry:
+	call void @lcallee() nounwind
+	call void @lcallee() nounwind
+	ret void
+; LINUX-64-STATIC: lcaller:
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @tailcaller() nounwind {
+entry:
+	call void @callee() nounwind
+	ret void
+; LINUX-64-STATIC: tailcaller:
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: tailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	callee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: tailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	callee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: tailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _tailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _tailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _tailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _tailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _tailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _tailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dtailcaller() nounwind {
+entry:
+	call void @dcallee() nounwind
+	ret void
+; LINUX-64-STATIC: dtailcaller:
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dtailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	dcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dtailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	dcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dtailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dtailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dtailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dtailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dtailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dtailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dtailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ltailcaller() nounwind {
+entry:
+	call void @lcallee() nounwind
+	ret void
+; LINUX-64-STATIC: ltailcaller:
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ltailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	lcallee
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ltailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	lcallee
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ltailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ltailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ltailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ltailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ltailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ltailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ltailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @icaller() nounwind {
+entry:
+	%0 = load void ()** @ifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @ifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: icaller:
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: icaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: icaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: icaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _icaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _icaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _icaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L143$pb
+; DARWIN-32-PIC-NEXT: L143$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L143$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _icaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _icaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _icaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dicaller() nounwind {
+entry:
+	%0 = load void ()** @difunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @difunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: dicaller:
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dicaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dicaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dicaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dicaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dicaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dicaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L144$pb
+; DARWIN-32-PIC-NEXT: L144$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dicaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dicaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dicaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @licaller() nounwind {
+entry:
+	%0 = load void ()** @lifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @lifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: licaller:
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: licaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: licaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: licaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _licaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _licaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _licaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L145$pb
+; DARWIN-32-PIC-NEXT: L145$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L145$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _licaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _licaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _licaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @itailcaller() nounwind {
+entry:
+	%0 = load void ()** @ifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @ifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: itailcaller:
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: itailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	call	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: itailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	call	*ifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: itailcaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _itailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	call	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _itailcaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	call	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _itailcaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	call	L146$pb
+; DARWIN-32-PIC-NEXT: L146$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L146$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	call	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _itailcaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _itailcaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _itailcaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ditailcaller() nounwind {
+entry:
+	%0 = load void ()** @difunc, align 8
+	call void %0() nounwind
+	ret void
+; LINUX-64-STATIC: ditailcaller:
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ditailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*difunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ditailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*difunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ditailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	callq	*(%rax)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ditailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ditailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ditailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L147$pb
+; DARWIN-32-PIC-NEXT: L147$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_difunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ditailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ditailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ditailcaller:
+; DARWIN-64-PIC: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @litailcaller() nounwind {
+entry:
+	%0 = load void ()** @lifunc, align 8
+	call void %0() nounwind
+	ret void
+; LINUX-64-STATIC: litailcaller:
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: litailcaller:
+; LINUX-32-STATIC: 	subl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	call	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl	$4, %esp
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: litailcaller:
+; LINUX-32-PIC: 	subl	$4, %esp
+; LINUX-32-PIC-NEXT: 	call	*lifunc
+; LINUX-32-PIC-NEXT: 	addl	$4, %esp
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: litailcaller:
+; LINUX-64-PIC: 	subq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	addq	$8, %rsp
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _litailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	call	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _litailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	call	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _litailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	call	L148$pb
+; DARWIN-32-PIC-NEXT: L148$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	call	*_lifunc-L148$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _litailcaller:
+; DARWIN-64-STATIC: 	subq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _litailcaller:
+; DARWIN-64-DYNAMIC: 	subq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _litailcaller:
+; DARWIN-64-PIC: 	subq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	addq	$8, %rsp
+; DARWIN-64-PIC-NEXT: 	ret
+}

diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
new file mode 100644
index 0000000..3991a68
--- /dev/null
+++ b/test/CodeGen/X86/add.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; The immediate can be encoded in a smaller way if the
+; instruction is a sub instead of an add.
+
+define i32 @test1(i32 inreg %a) nounwind {
+  %b = add i32 %a, 128
+  ret i32 %b
+; X32: subl	$-128, %eax
+; X64: subl $-128, 
+}
+define i64 @test2(i64 inreg %a) nounwind {
+  %b = add i64 %a, 2147483648
+  ret i64 %b
+; X32: addl	$-2147483648, %eax
+; X64: subq	$-2147483648,
+}
+define i64 @test3(i64 inreg %a) nounwind {
+  %b = add i64 %a, 128
+  ret i64 %b
+  
+; X32: addl $128, %eax
+; X64: subq	$-128,
+}
+
+define i1 @test4(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  store i32 0, i32* %X
+  br label %overflow
+
+overflow:
+  ret i1 false
+  
+; X32: test4:
+; X32: addl
+; X32-NEXT: jo
+
+; X64:        test4:
+; X64:          addl	%esi, %edi
+; X64-NEXT:	jo
+}
+
+define i1 @test5(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  store i32 0, i32* %X
+  br label %carry
+
+carry:
+  ret i1 false
+
+; X32: test5:
+; X32: addl
+; X32-NEXT: jb
+
+; X64:        test5:
+; X64:          addl	%esi, %edi
+; X64-NEXT:	jb
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
+
+
+define i64 @test6(i64 %A, i32 %B) nounwind {
+        %tmp12 = zext i32 %B to i64             ; <i64> [#uses=1]
+        %tmp3 = shl i64 %tmp12, 32              ; <i64> [#uses=1]
+        %tmp5 = add i64 %tmp3, %A               ; <i64> [#uses=1]
+        ret i64 %tmp5
+
+; X32: test6:
+; X32:	    movl 12(%esp), %edx
+; X32-NEXT: addl 8(%esp), %edx
+; X32-NEXT: movl 4(%esp), %eax
+; X32-NEXT: ret
+        
+; X64: test6:
+; X64:	shlq	$32, %rsi
+; X64:	leaq	(%rsi,%rdi), %rax
+; X64:	ret
+}
+

diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll
new file mode 100644
index 0000000..547d6b5
--- /dev/null
+++ b/test/CodeGen/X86/addr-label-difference.ll

@@ -0,0 +1,22 @@
+; RUN: llc %s -o - | grep {__TEXT,__const}
+; PR5929
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+; This array should go into the __TEXT,__const section, not into the
+; __DATA,__const section, because the elements don't need relocations.
+@test.array = internal constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
+
+define void @test(i32 %i) nounwind ssp {
+entry:
+  br label %foo
+
+foo:                                              ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
+  br label %bar
+
+bar:                                              ; preds = %foo, %indirectgoto
+  br label %hack
+
+hack:                                             ; preds = %bar, %indirectgoto
+  ret void
+}

diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
new file mode 100644
index 0000000..3020eb3
--- /dev/null
+++ b/test/CodeGen/X86/aliases.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
+; RUN: grep { = } %t   | count 7
+; RUN: grep set %t   | count 16
+; RUN: grep globl %t | count 6
+; RUN: grep weak %t  | count 1
+; RUN: grep hidden %t | count 1
+; RUN: grep protected %t | count 1
+
+@bar = external global i32
+@foo1 = alias i32* @bar
+@foo2 = alias i32* @bar
+
+%FunTy = type i32()
+
+declare i32 @foo_f()
+@bar_f = alias weak %FunTy* @foo_f
+
+@bar_i = alias internal i32* @bar
+
+@A = alias bitcast (i32* @bar to i64*)
+
+@bar_h = hidden alias i32* @bar
+
+@bar_p = protected alias i32* @bar
+
+define i32 @test() {
+entry:
+   %tmp = load i32* @foo1
+   %tmp1 = load i32* @foo2
+   %tmp0 = load i32* @bar_i
+   %tmp2 = call i32 @foo_f()
+   %tmp3 = add i32 %tmp, %tmp2
+   %tmp4 = call %FunTy* @bar_f()
+   %tmp5 = add i32 %tmp3, %tmp4
+   %tmp6 = add i32 %tmp1, %tmp5
+   %tmp7 = add i32 %tmp6, %tmp0
+   ret i32 %tmp7
+}

diff --git a/test/CodeGen/X86/aligned-comm.ll b/test/CodeGen/X86/aligned-comm.ll
new file mode 100644
index 0000000..7715869
--- /dev/null
+++ b/test/CodeGen/X86/aligned-comm.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+
+; Darwin 9+ should get alignment on common symbols.
+@array = common global [4128 x i32] zeroinitializer, align 128

diff --git a/test/CodeGen/X86/all-ones-vector.ll b/test/CodeGen/X86/all-ones-vector.ll
new file mode 100644
index 0000000..10fecad
--- /dev/null
+++ b/test/CodeGen/X86/all-ones-vector.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
+
+define <4 x i32> @ioo() nounwind {
+        ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+define <2 x i64> @loo() nounwind {
+        ret <2 x i64> <i64 -1, i64 -1>
+}
+define <2 x double> @doo() nounwind {
+        ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
+}
+define <4 x float> @foo() nounwind {
+        ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000>
+}

diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
new file mode 100644
index 0000000..f45e9b8
--- /dev/null
+++ b/test/CodeGen/X86/alloca-align-rounding.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i32 %h) {
+  %p = alloca <2 x i64>, i32 %h
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
+
+define void @foo2(i32 %h) {
+  %p = alloca <2 x i64>, i32 %h, align 32
+  call void @bar(<2 x i64>* %p)
+  ret void
+}

diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
new file mode 100644
index 0000000..7733b8a
--- /dev/null
+++ b/test/CodeGen/X86/and-or-fold.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep and | count 1
+
+; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1
+; in this case.
+
+define i32 @test6(i32 %x, i16 %y) {
+        %tmp1 = zext i16 %y to i32              ; <i32> [#uses=1]
+        %tmp2 = and i32 %tmp1, 127              ; <i32> [#uses=1]
+        %tmp4 = shl i32 %x, 16          ; <i32> [#uses=1]
+        %tmp5 = and i32 %tmp4, 16711680         ; <i32> [#uses=1]
+        %tmp6 = or i32 %tmp2, %tmp5             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+

diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll
new file mode 100644
index 0000000..38db88a
--- /dev/null
+++ b/test/CodeGen/X86/and-su.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't duplicate the load.
+
+define fastcc i32 @foo(i32* %p) nounwind {
+; CHECK: foo:
+; CHECK: andl $10, %eax
+; CHECK: je
+	%t0 = load i32* %p
+	%t2 = and i32 %t0, 10
+	%t3 = icmp ne i32 %t2, 0
+	br i1 %t3, label %bb63, label %bb76
+bb63:
+	ret i32 %t2
+bb76:
+	ret i32 0
+}
+
+define fastcc double @bar(i32 %hash, double %x, double %y) nounwind {
+entry:
+; CHECK: bar:
+  %0 = and i32 %hash, 15
+  %1 = icmp ult i32 %0, 8
+  br i1 %1, label %bb11, label %bb10
+
+bb10:
+; CHECK: bb10
+; CHECK: testb $1
+  %2 = and i32 %hash, 1
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %bb13, label %bb11
+
+bb11:
+  %4 = fsub double -0.000000e+00, %x
+  br label %bb13
+
+bb13:
+; CHECK: bb13
+; CHECK: testb $2
+  %iftmp.9.0 = phi double [ %4, %bb11 ], [ %x, %bb10 ]
+  %5 = and i32 %hash, 2
+  %6 = icmp eq i32 %5, 0
+  br i1 %6, label %bb16, label %bb14
+
+bb14:
+  %7 = fsub double -0.000000e+00, %y
+  br label %bb16
+
+bb16:
+  %iftmp.10.0 = phi double [ %7, %bb14 ], [ %y, %bb13 ]
+  %8 = fadd double %iftmp.9.0, %iftmp.10.0
+  ret double %8
+}

diff --git a/test/CodeGen/X86/anyext.ll b/test/CodeGen/X86/anyext.ll
new file mode 100644
index 0000000..106fe83
--- /dev/null
+++ b/test/CodeGen/X86/anyext.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 | grep movzbl | count 2
+
+; Use movzbl to avoid partial-register updates.
+
+define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
+  %q = trunc i32 %p to i8
+  %r = udiv i8 %q, %x
+  %s = zext i8 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
+define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
+  %q = trunc i32 %p to i16
+  %r = udiv i16 %q, %x
+  %s = zext i16 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}

diff --git a/test/CodeGen/X86/arg-cast.ll b/test/CodeGen/X86/arg-cast.ll
new file mode 100644
index 0000000..c111514
--- /dev/null
+++ b/test/CodeGen/X86/arg-cast.ll

@@ -0,0 +1,18 @@
+; This should compile to movl $2147483647, %eax + andl only.
+; RUN: llc < %s | grep andl
+; RUN: llc < %s | not grep movsd
+; RUN: llc < %s | grep esp | not grep add
+; rdar://5736574
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define i32 @foo(double %x) nounwind  {
+entry:
+	%x15 = bitcast double %x to i64		; <i64> [#uses=1]
+	%tmp713 = lshr i64 %x15, 32		; <i64> [#uses=1]
+	%tmp714 = trunc i64 %tmp713 to i32		; <i32> [#uses=1]
+	%tmp8 = and i32 %tmp714, 2147483647		; <i32> [#uses=1]
+	ret i32 %tmp8
+}
+

diff --git a/test/CodeGen/X86/asm-block-labels.ll b/test/CodeGen/X86/asm-block-labels.ll
new file mode 100644
index 0000000..a43d430
--- /dev/null
+++ b/test/CodeGen/X86/asm-block-labels.ll

@@ -0,0 +1,41 @@
+; RUN: opt < %s -std-compile-opts | llc
+; ModuleID = 'block12.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define void @bar() {
+entry:
+	br label %"LASM$foo"
+
+"LASM$foo":		; preds = %entry
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 1", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 2", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "brl ${0:l}", "X,~{dirflag},~{fpsr},~{flags},~{memory}"( label %"LASM$foo" )
+	br label %return
+
+return:		; preds = %"LASM$foo"
+	ret void
+}
+
+define void @baz() {
+entry:
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 3", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "brl ${0:l}", "X,~{dirflag},~{fpsr},~{flags},~{memory}"( label %"LASM$foo" )
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 4", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	br label %"LASM$foo"
+
+"LASM$foo":		; preds = %entry
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 5", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	br label %return
+
+return:		; preds = %"LASM$foo"
+	ret void
+}

diff --git a/test/CodeGen/X86/asm-global-imm.ll b/test/CodeGen/X86/asm-global-imm.ll
new file mode 100644
index 0000000..96da224
--- /dev/null
+++ b/test/CodeGen/X86/asm-global-imm.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | \
+; RUN:   grep {test1 \$_GV}
+; RUN: llc < %s -march=x86 -relocation-model=static | \
+; RUN:   grep {test2 _GV}
+; PR882
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9.0.0d2"
+@GV = weak global i32 0		; <i32*> [#uses=2]
+@str = external global [12 x i8]		; <[12 x i8]*> [#uses=1]
+
+define void @foo() {
+entry:
+	tail call void asm sideeffect "test1 $0", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )
+	tail call void asm sideeffect "test2 ${0:c}", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )
+	ret void
+}
+
+define void @unknown_bootoption() {
+entry:
+	call void asm sideeffect "ud2\0A\09.word ${0:c}\0A\09.long ${1:c}\0A", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 235, i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
+	ret void
+}

diff --git a/test/CodeGen/X86/asm-indirect-mem.ll b/test/CodeGen/X86/asm-indirect-mem.ll
new file mode 100644
index 0000000..c57aa99
--- /dev/null
+++ b/test/CodeGen/X86/asm-indirect-mem.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s 
+; PR2267
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @atomic_store_rel_int(i32* %p, i32 %v) nounwind  {
+entry:
+	%asmtmp = tail call i32 asm sideeffect "xchgl $1,$0", "=*m,=r,*m,1,~{dirflag},~{fpsr},~{flags}"( i32* %p, i32* %p, i32 %v ) nounwind 		; <i32> [#uses=0]
+	ret void
+}
+

diff --git a/test/CodeGen/X86/asm-modifier-P.ll b/test/CodeGen/X86/asm-modifier-P.ll
new file mode 100644
index 0000000..6139da8
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier-P.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-32
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-32
+; RUN: llc < %s -march=x86-64 -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-64
+; RUN: llc < %s -march=x86-64 -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-64
+; PR3379
+; XFAIL: *
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@G = external global i32              ; <i32*> [#uses=1]
+
+declare void @bar(...)
+
+; extern int G;
+; void test1() {
+;  asm("frob %0 x" : : "m"(G));
+;  asm("frob %P0 x" : : "m"(G));
+;}
+
+define void @test1() nounwind {
+entry:
+; P suffix removes (rip) in -static 64-bit mode.
+
+; CHECK-PIC-64: test1:
+; CHECK-PIC-64: movq	G@GOTPCREL(%rip), %rax
+; CHECK-PIC-64: frob (%rax) x
+; CHECK-PIC-64: frob (%rax) x
+
+; CHECK-STATIC-64: test1:
+; CHECK-STATIC-64: frob G(%rip) x
+; CHECK-STATIC-64: frob G x
+
+; CHECK-PIC-32: test1:
+; CHECK-PIC-32: frob G x
+; CHECK-PIC-32: frob G x
+
+; CHECK-STATIC-32: test1:
+; CHECK-STATIC-32: frob G x
+; CHECK-STATIC-32: frob G x
+
+        call void asm "frob $0 x", "*m"(i32* @G) nounwind
+        call void asm "frob ${0:P} x", "*m"(i32* @G) nounwind
+        ret void
+}
+
+define void @test3() nounwind {
+entry:
+; CHECK-STATIC-64: test3:
+; CHECK-STATIC-64: call bar
+; CHECK-STATIC-64: call test3
+; CHECK-STATIC-64: call $bar
+; CHECK-STATIC-64: call $test3
+
+; CHECK-STATIC-32: test3:
+; CHECK-STATIC-32: call bar
+; CHECK-STATIC-32: call test3
+; CHECK-STATIC-32: call $bar
+; CHECK-STATIC-32: call $test3
+
+; CHECK-PIC-64: test3:
+; CHECK-PIC-64: call bar@PLT
+; CHECK-PIC-64: call test3@PLT
+; CHECK-PIC-64: call $bar
+; CHECK-PIC-64: call $test3
+
+; CHECK-PIC-32: test3:
+; CHECK-PIC-32: call bar@PLT
+; CHECK-PIC-32: call test3@PLT
+; CHECK-PIC-32: call $bar
+; CHECK-PIC-32: call $test3
+
+
+; asm(" blah %P0" : : "X"(bar));
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  ret void
+}

diff --git a/test/CodeGen/X86/asm-modifier.ll b/test/CodeGen/X86/asm-modifier.ll
new file mode 100644
index 0000000..44f972e
--- /dev/null
+++ b/test/CodeGen/X86/asm-modifier.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'asm.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test1() nounwind {
+entry:
+; CHECK: test1:
+; CHECK: movw	%gs:6, %ax
+  %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
+  %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+define zeroext i16 @test2(i32 %address) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movw	%gs:(%eax), %ax
+  %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
+  ret i16 %asmtmp
+}
+
+@n = global i32 42                                ; <i32*> [#uses=3]
+@y = common global i32 0                          ; <i32*> [#uses=3]
+
+define void @test3() nounwind {
+entry:
+; CHECK: test3:
+; CHECK: movl _n, %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @n) nounwind
+  ret void
+}
+
+define void @test4() nounwind {
+entry:
+; CHECK: test4:
+; CHECK: movl	L_y$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @y) nounwind
+  ret void
+}

diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
new file mode 100644
index 0000000..d00f8e8
--- /dev/null
+++ b/test/CodeGen/X86/atomic_add.ll

@@ -0,0 +1,217 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://7103704
+
+define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @inc4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @add8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add8:
+; CHECK: addq $2
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add4:
+; CHECK: addq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @inc3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc3:
+; CHECK: incb
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @add7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add7:
+; CHECK: addb $2
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add3:
+; CHECK: addb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @inc2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc2:
+; CHECK: incw
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @add6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add6:
+; CHECK: addw $2
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add2:
+; CHECK: addw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @inc1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc1:
+; CHECK: incl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @add5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add5:
+; CHECK: addl $2
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add1:
+; CHECK: addl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @dec4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec4:
+; CHECK: decq
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @sub8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub8:
+; CHECK: subq $2
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub4:
+; CHECK: subq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @dec3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec3:
+; CHECK: decb
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @sub7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub7:
+; CHECK: subb $2
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub3:
+; CHECK: subb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @dec2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec2:
+; CHECK: decw
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @sub6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub6:
+; CHECK: subw $2
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub2:
+; CHECK: subw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @dec1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec1:
+; CHECK: decl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @sub5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub5:
+; CHECK: subl $2
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}

diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
new file mode 100644
index 0000000..3ef1887
--- /dev/null
+++ b/test/CodeGen/X86/atomic_op.ll

@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=x86 -o %t1
+; RUN: grep "lock" %t1 | count 17
+; RUN: grep "xaddl" %t1 | count 4 
+; RUN: grep "cmpxchgl"  %t1 | count 13 
+; RUN: grep "xchgl" %t1 | count 14
+; RUN: grep "cmova" %t1 | count 2
+; RUN: grep "cmovb" %t1 | count 2
+; RUN: grep "cmovg" %t1 | count 2
+; RUN: grep "cmovl" %t1 | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define void @main(i32 %argc, i8** %argv) {
+entry:
+	%argc.addr = alloca i32		; <i32*> [#uses=1]
+	%argv.addr = alloca i8**		; <i8***> [#uses=1]
+	%val1 = alloca i32		; <i32*> [#uses=2]
+	%val2 = alloca i32		; <i32*> [#uses=15]
+	%andt = alloca i32		; <i32*> [#uses=2]
+	%ort = alloca i32		; <i32*> [#uses=2]
+	%xort = alloca i32		; <i32*> [#uses=2]
+	%old = alloca i32		; <i32*> [#uses=18]
+	%temp = alloca i32		; <i32*> [#uses=2]
+	store i32 %argc, i32* %argc.addr
+	store i8** %argv, i8*** %argv.addr
+	store i32 0, i32* %val1
+	store i32 31, i32* %val2
+	store i32 3855, i32* %andt
+	store i32 3855, i32* %ort
+	store i32 3855, i32* %xort
+	store i32 4, i32* %temp
+	%tmp = load i32* %temp		; <i32> [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp )		; <i32>:0 [#uses=1]
+	store i32 %0, i32* %old
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 )		; <i32>:1 [#uses=1]
+	store i32 %1, i32* %old
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 )		; <i32>:2 [#uses=1]
+	store i32 %2, i32* %old
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 )		; <i32>:3 [#uses=1]
+	store i32 %3, i32* %old
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 )		; <i32>:4 [#uses=1]
+	store i32 %4, i32* %old
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 )		; <i32>:5 [#uses=1]
+	store i32 %5, i32* %old
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 )		; <i32>:6 [#uses=1]
+	store i32 %6, i32* %old
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* %old
+	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg )		; <i32>:8 [#uses=1]
+	store i32 %8, i32* %old
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* %old
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 )		; <i32>:10 [#uses=1]
+	store i32 %10, i32* %old
+	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 65535 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* %old
+	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 10 )		; <i32>:12 [#uses=1]
+	store i32 %12, i32* %old
+	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 1 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* %old
+	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 10 )		; <i32>:14 [#uses=1]
+	store i32 %14, i32* %old
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %val2, i32 1976 )		; <i32>:15 [#uses=1]
+	store i32 %15, i32* %old
+	%neg1 = sub i32 0, 10		; <i32> [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 %neg1, i32 1 )		; <i32>:16 [#uses=1]
+	store i32 %16, i32* %old
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 1976, i32 1 )		; <i32>:17 [#uses=1]
+	store i32 %17, i32* %old
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.min.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.max.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umax.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umin.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind 

diff --git a/test/CodeGen/X86/attribute-sections.ll b/test/CodeGen/X86/attribute-sections.ll
new file mode 100644
index 0000000..3035334
--- /dev/null
+++ b/test/CodeGen/X86/attribute-sections.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+declare i32 @foo()
+@G0 = global i32 ()* @foo, section ".init_array"
+
+; LINUX:  .section  .init_array,"aw"
+; LINUX:  .globl G0
+
+@G1 = global i32 ()* @foo, section ".fini_array"
+
+; LINUX:  .section  .fini_array,"aw"
+; LINUX:  .globl G1
+
+@G2 = global i32 ()* @foo, section ".preinit_array"
+
+; LINUX:  .section .preinit_array,"aw"
+; LINUX:  .globl G2
+

diff --git a/test/CodeGen/X86/avoid-lea-scale2.ll b/test/CodeGen/X86/avoid-lea-scale2.ll
new file mode 100644
index 0000000..8003de2
--- /dev/null
+++ b/test/CodeGen/X86/avoid-lea-scale2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep {leal.*-2(\[%\]rdi,\[%\]rdi)}
+
+define i32 @foo(i32 %x) nounwind readnone {
+  %t0 = shl i32 %x, 1
+  %t1 = add i32 %t0, -2
+  ret i32 %t1
+}
+

diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
new file mode 100644
index 0000000..fc9d1f0
--- /dev/null
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 | grep align | count 4
+
+; TODO: Is it a good idea to align inner loops? It's hard to know without
+; knowing what their trip counts are, or other dynamic information. For
+; now, CodeGen aligns all loops.
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define i32 @t(i32 %a, i32 %b) nounwind readonly ssp {
+entry:
+	%0 = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb5, label %bb.nph12
+
+bb.nph12:		; preds = %entry
+	%1 = icmp eq i32 %b, 0		; <i1> [#uses=1]
+	%2 = load i32** @x, align 8		; <i32*> [#uses=1]
+	br i1 %1, label %bb2.preheader, label %bb2.preheader.us
+
+bb2.preheader.us:		; preds = %bb2.bb3_crit_edge.us, %bb.nph12
+	%indvar18 = phi i32 [ 0, %bb.nph12 ], [ %indvar.next19, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=2]
+	%sum.111.us = phi i32 [ 0, %bb.nph12 ], [ %4, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=0]
+	%tmp16 = mul i32 %indvar18, %a		; <i32> [#uses=1]
+	br label %bb1.us
+
+bb1.us:		; preds = %bb1.us, %bb2.preheader.us
+	%indvar = phi i32 [ 0, %bb2.preheader.us ], [ %indvar.next, %bb1.us ]		; <i32> [#uses=2]
+	%tmp17 = add i32 %indvar, %tmp16		; <i32> [#uses=1]
+	%tmp. = zext i32 %tmp17 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32* %2, i64 %tmp.		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=2]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %b		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us
+
+bb2.bb3_crit_edge.us:		; preds = %bb1.us
+	%indvar.next19 = add i32 %indvar18, 1		; <i32> [#uses=2]
+	%exitcond22 = icmp eq i32 %indvar.next19, %a		; <i1> [#uses=1]
+	br i1 %exitcond22, label %bb5, label %bb2.preheader.us
+
+bb2.preheader:		; preds = %bb2.preheader, %bb.nph12
+	%indvar24 = phi i32 [ %indvar.next25, %bb2.preheader ], [ 0, %bb.nph12 ]		; <i32> [#uses=1]
+	%indvar.next25 = add i32 %indvar24, 1		; <i32> [#uses=2]
+	%exitcond28 = icmp eq i32 %indvar.next25, %a		; <i1> [#uses=1]
+	br i1 %exitcond28, label %bb5, label %bb2.preheader
+
+bb5:		; preds = %bb2.preheader, %bb2.bb3_crit_edge.us, %entry
+	%sum.1.lcssa = phi i32 [ 0, %entry ], [ 0, %bb2.preheader ], [ %4, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=1]
+	ret i32 %sum.1.lcssa
+}

diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
new file mode 100644
index 0000000..d4c5c67
--- /dev/null
+++ b/test/CodeGen/X86/avoid-loop-align.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CodeGen should align the top of the loop, which differs from the loop
+; header in this case.
+
+; CHECK: jmp LBB1_2
+; CHECK: .align
+; CHECK: LBB1_1:
+
+@A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
+
+define i8* @test(i8* %Q, i32* %L) nounwind {
+entry:
+	%tmp = tail call i32 (...)* @foo() nounwind		; <i32> [#uses=2]
+	%tmp1 = inttoptr i32 %tmp to i8*		; <i8*> [#uses=1]
+	br label %bb1
+
+bb:		; preds = %bb1, %bb1
+	%indvar.next = add i32 %P.0.rec, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%P.0 = getelementptr i8* %tmp1, i32 %P.0.rec		; <i8*> [#uses=3]
+	%tmp2 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	switch i8 %tmp2, label %bb4 [
+		i8 12, label %bb
+		i8 42, label %bb
+	]
+
+bb4:		; preds = %bb1
+	%tmp3 = ptrtoint i8* %P.0 to i32		; <i32> [#uses=1]
+	%tmp4 = sub i32 %tmp3, %tmp		; <i32> [#uses=1]
+	%tmp5 = getelementptr [100 x i32]* @A, i32 0, i32 %tmp4		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp5, align 4
+	ret i8* %P.0
+}
+
+declare i32 @foo(...)

diff --git a/test/CodeGen/X86/bigstructret.ll b/test/CodeGen/X86/bigstructret.ll
new file mode 100644
index 0000000..633995d
--- /dev/null
+++ b/test/CodeGen/X86/bigstructret.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.24601, 12(%ecx)" %t
+; RUN: grep "movl	.48, 8(%ecx)" %t
+; RUN: grep "movl	.24, 4(%ecx)" %t
+; RUN: grep "movl	.12, (%ecx)" %t
+
+%0 = type { i32, i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+

diff --git a/test/CodeGen/X86/bigstructret2.ll b/test/CodeGen/X86/bigstructret2.ll
new file mode 100644
index 0000000..46e0fd2
--- /dev/null
+++ b/test/CodeGen/X86/bigstructret2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -o %t
+
+%0 = type { i64, i64 }
+
+declare fastcc %0 @ReturnBigStruct() nounwind readnone
+
+define void @test(%0* %p) {
+  %1 = call fastcc %0 @ReturnBigStruct()
+  store %0 %1, %0* %p
+  ret void
+}
+

diff --git a/test/CodeGen/X86/bitcast-int-to-vector.ll b/test/CodeGen/X86/bitcast-int-to-vector.ll
new file mode 100644
index 0000000..4c25979
--- /dev/null
+++ b/test/CodeGen/X86/bitcast-int-to-vector.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+
+define i1 @foo(i64 %a)
+{
+  %t = bitcast i64 %a to <2 x float>
+  %r = extractelement <2 x float> %t, i32 0
+  %s = extractelement <2 x float> %t, i32 1
+  %b = fcmp uno float %r, %s
+  ret i1 %b
+}

diff --git a/test/CodeGen/X86/bitcast.ll b/test/CodeGen/X86/bitcast.ll
new file mode 100644
index 0000000..c34c675
--- /dev/null
+++ b/test/CodeGen/X86/bitcast.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+; PR1033
+
+define i64 @test1(double %t) {
+        %u = bitcast double %t to i64           ; <i64> [#uses=1]
+        ret i64 %u
+}
+
+define double @test2(i64 %t) {
+        %u = bitcast i64 %t to double           ; <double> [#uses=1]
+        ret double %u
+}
+
+define i32 @test3(float %t) {
+        %u = bitcast float %t to i32            ; <i32> [#uses=1]
+        ret i32 %u
+}
+
+define float @test4(i32 %t) {
+        %u = bitcast i32 %t to float            ; <float> [#uses=1]
+        ret float %u
+}
+

diff --git a/test/CodeGen/X86/bitcast2.ll b/test/CodeGen/X86/bitcast2.ll
new file mode 100644
index 0000000..48922b5
--- /dev/null
+++ b/test/CodeGen/X86/bitcast2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 | not grep rsp
+
+define i64 @test1(double %A) {
+   %B = bitcast double %A to i64
+   ret i64 %B
+}
+
+define double @test2(i64 %A) {
+   %B = bitcast i64 %A to double
+   ret double %B
+}
+

diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
new file mode 100644
index 0000000..8af3bd1
--- /dev/null
+++ b/test/CodeGen/X86/br-fold.ll

@@ -0,0 +1,20 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: orq
+; CHECK-NEXT: jne
+
+@_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
+@_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
+
+define fastcc void @foo() {
+entry:
+  br i1 icmp eq (i64 or (i64 ptrtoint ([33 x i16]* @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE to i64),
+                         i64 ptrtoint ([9 x i16]* @_ZN11xercesc_2_56XMLUni16fgNotationStringE to i64)), i64 0),
+     label %bb8.i329, label %bb4.i.i318.preheader
+
+bb4.i.i318.preheader:                             ; preds = %bb6
+  unreachable
+
+bb8.i329:                                         ; preds = %bb6
+  unreachable
+}

diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
new file mode 100644
index 0000000..130483a
--- /dev/null
+++ b/test/CodeGen/X86/brcond.ll

@@ -0,0 +1,69 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; rdar://7475489
+
+define i32 @test1(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: test1:
+; CHECK: xorb
+; CHECK-NOT: andb
+; CHECK-NOT: shrb
+; CHECK: testb $64
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
+
+
+; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
+define i32 @test2(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp eq i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp eq i32* %Q, null                    ; <i1> [#uses=1]
+  %c = and i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test2:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	jne	LBB2_2
+}
+
+; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
+define i32 @test3(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp ne i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp ne i32* %Q, null                    ; <i1> [#uses=1]
+  %c = or i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test3:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	je	LBB3_2
+}

diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
new file mode 100644
index 0000000..972b3cd
--- /dev/null
+++ b/test/CodeGen/X86/break-anti-dependencies.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=none > %t
+; RUN:   grep {%xmm0} %t | count 14
+; RUN:   not grep {%xmm1} %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
+; RUN:   grep {%xmm0} %t | count 7
+; RUN:   grep {%xmm1} %t | count 7
+
+define void @goo(double* %r, double* %p, double* %q) nounwind {
+entry:
+	%0 = load double* %p, align 8
+	%1 = fadd double %0, 1.100000e+00
+	%2 = fmul double %1, 1.200000e+00
+	%3 = fadd double %2, 1.300000e+00
+	%4 = fmul double %3, 1.400000e+00
+	%5 = fadd double %4, 1.500000e+00
+	%6 = fptosi double %5 to i32
+	%7 = load double* %r, align 8
+	%8 = fadd double %7, 7.100000e+00
+	%9 = fmul double %8, 7.200000e+00
+	%10 = fadd double %9, 7.300000e+00
+	%11 = fmul double %10, 7.400000e+00
+	%12 = fadd double %11, 7.500000e+00
+	%13 = fptosi double %12 to i32
+	%14 = icmp slt i32 %6, %13
+	br i1 %14, label %bb, label %return
+
+bb:
+	store double 9.300000e+00, double* %q, align 8
+	ret void
+
+return:
+	ret void
+}

diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
new file mode 100644
index 0000000..acc0647
--- /dev/null
+++ b/test/CodeGen/X86/break-sse-dep.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s
+
+define double @t1(float* nocapture %x) nounwind readonly ssp {
+entry:
+; CHECK: t1:
+; CHECK: movss (%rdi), %xmm0
+; CHECK; cvtss2sd %xmm0, %xmm0
+
+  %0 = load float* %x, align 4
+  %1 = fpext float %0 to double
+  ret double %1
+}
+
+define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
+entry:
+; CHECK: t2:
+; CHECK; cvtsd2ss (%rdi), %xmm0
+  %0 = load double* %x, align 8
+  %1 = fptrunc double %0 to float
+  ret float %1
+}

diff --git a/test/CodeGen/X86/bss_pagealigned.ll b/test/CodeGen/X86/bss_pagealigned.ll
new file mode 100644
index 0000000..da95aca
--- /dev/null
+++ b/test/CodeGen/X86/bss_pagealigned.ll

@@ -0,0 +1,21 @@
+; RUN: llc --code-model=kernel -march=x86-64 <%s -asm-verbose=0 | FileCheck %s
+; PR4933
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+%struct.kmem_cache_order_objects = type { i64 }
+declare i8* @memset(i8*, i32, i64)
+define void @unxlate_dev_mem_ptr(i64 %phis, i8* %addr) nounwind {
+  %pte.addr.i = alloca %struct.kmem_cache_order_objects*
+  %call8 = call i8* @memset(i8* bitcast ([512 x %struct.kmem_cache_order_objects]* @bm_pte to i8*), i32 0, i64 4096)
+; CHECK: movq    $bm_pte, %rdi
+; CHECK-NEXT: xorl    %esi, %esi
+; CHECK-NEXT: movl    $4096, %edx
+; CHECK-NEXT: callq   memset
+  ret void
+}
+@bm_pte = internal global [512 x %struct.kmem_cache_order_objects] zeroinitializer, section ".bss.page_aligned", align 4096
+; CHECK: .section        .bss.page_aligned,"aw",@nobits
+; CHECK-NEXT: .align  4096
+; CHECK-NEXT: bm_pte:
+; CHECK-NEXT: .zero   4096
+; CHECK-NEXT: .size   bm_pte, 4096

diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
new file mode 100644
index 0000000..5bf58fa
--- /dev/null
+++ b/test/CodeGen/X86/bswap-inline-asm.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep APP %t
+; RUN: grep bswapq %t | count 2
+; RUN: grep bswapl %t | count 1
+
+define i64 @foo(i64 %x) nounwind {
+	%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret i64 %asmtmp
+}
+define i64 @bar(i64 %x) nounwind {
+	%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret i64 %asmtmp
+}
+define i32 @pen(i32 %x) nounwind {
+	%asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+	ret i32 %asmtmp
+}

diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
new file mode 100644
index 0000000..0a72c1c
--- /dev/null
+++ b/test/CodeGen/X86/bswap.ll

@@ -0,0 +1,27 @@
+; bswap should be constant folded when it is passed a constant argument
+
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep bswapl | count 3
+; RUN: llc < %s -march=x86 | grep rolw | count 1
+
+declare i16 @llvm.bswap.i16(i16)
+
+declare i32 @llvm.bswap.i32(i32)
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i16 @W(i16 %A) {
+        %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
+        ret i16 %Z
+}
+
+define i32 @X(i32 %A) {
+        %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i64 @Y(i64 %A) {
+        %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
+        ret i64 %Z
+}
+

diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
new file mode 100644
index 0000000..ec447e5
--- /dev/null
+++ b/test/CodeGen/X86/bt.ll

@@ -0,0 +1,442 @@
+; RUN: llc < %s -march=x86 | grep btl | count 28
+; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; PR3253
+
+; The register+memory form of the BT instruction should be usable on
+; pentium4, however it is currently disabled due to the register+memory
+; form having different semantics than the register+register form.
+
+; Test these patterns:
+;    (X & (1 << N))  != 0  -->  BT(X, N).
+;    ((X >>u N) & 1) != 0  -->  BT(X, N).
+; as well as several variations:
+;    - The second form can use an arithmetic shift.
+;    - Either form can use == instead of !=.
+;    - Either form can compare with an operand of the &
+;      instead of with 0.
+;    - The comparison can be commuted (only cases where neither
+;      operand is constant are included).
+;    - The and can be commuted.
+
+define void @test2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atest2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atest2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atestne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atestne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aquery2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aquery2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3x(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3bx(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aqueryne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aqueryne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3x(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3bx(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare void @foo()

diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
new file mode 100644
index 0000000..af36e1b
--- /dev/null
+++ b/test/CodeGen/X86/byval.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rsp), %rax}
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {movl	8(%esp), %edx} %t
+; RUN: grep {movl	4(%esp), %eax} %t
+
+%struct.s = type { i64, i64, i64 }
+
+define i64 @f(%struct.s* byval %a) {
+entry:
+	%tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
+	%tmp3 = load i64* %tmp2, align 8
+	ret i64 %tmp3
+}

diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
new file mode 100644
index 0000000..71129f5
--- /dev/null
+++ b/test/CodeGen/X86/byval2.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86    | grep rep.movsl | count 2
+
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64 }
+
+define void @g(i64 %a, i64 %b, i64 %c) {
+entry:
+	%d = alloca %struct.s, align 16
+	%tmp = getelementptr %struct.s* %d, i32 0, i32 0
+	store i64 %a, i64* %tmp, align 16
+	%tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+	store i64 %b, i64* %tmp2, align 16
+	%tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+	store i64 %c, i64* %tmp4, align 16
+	call void @f( %struct.s* %d byval)
+	call void @f( %struct.s* %d byval)
+	ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
new file mode 100644
index 0000000..504e0be
--- /dev/null
+++ b/test/CodeGen/X86/byval3.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl | count 2
+
+%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32 }
+
+define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind {
+entry:
+        %d = alloca %struct.s, align 16
+        %tmp = getelementptr %struct.s* %d, i32 0, i32 0
+        store i32 %a1, i32* %tmp, align 16
+        %tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+        store i32 %a2, i32* %tmp2, align 16
+        %tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+        store i32 %a3, i32* %tmp4, align 16
+        %tmp6 = getelementptr %struct.s* %d, i32 0, i32 3
+        store i32 %a4, i32* %tmp6, align 16
+        %tmp8 = getelementptr %struct.s* %d, i32 0, i32 4
+        store i32 %a5, i32* %tmp8, align 16
+        %tmp10 = getelementptr %struct.s* %d, i32 0, i32 5
+        store i32 %a6, i32* %tmp10, align 16
+        call void @f( %struct.s* %d byval)
+        call void @f( %struct.s* %d byval)
+        ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
new file mode 100644
index 0000000..4db9d65
--- /dev/null
+++ b/test/CodeGen/X86/byval4.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+
+%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16 }
+
+
+define void @g(i16 signext  %a1, i16 signext  %a2, i16 signext  %a3,
+	 i16 signext  %a4, i16 signext  %a5, i16 signext  %a6) nounwind {
+entry:
+        %a = alloca %struct.s, align 16
+        %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+        store i16 %a1, i16* %tmp, align 16
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        store i16 %a2, i16* %tmp2, align 16
+        %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+        store i16 %a3, i16* %tmp4, align 16
+        %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+        store i16 %a4, i16* %tmp6, align 16
+        %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+        store i16 %a5, i16* %tmp8, align 16
+        %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+        store i16 %a6, i16* %tmp10, align 16
+        call void @f( %struct.s* %a byval )
+        call void @f( %struct.s* %a byval )
+        ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
new file mode 100644
index 0000000..69c115b
--- /dev/null
+++ b/test/CodeGen/X86/byval5.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86-64 | grep rep.movsq | count 2
+; RUN: llc < %s -march=x86 | grep rep.movsl	 | count 2
+
+%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8 }
+
+
+define void @g(i8 signext  %a1, i8 signext  %a2, i8 signext  %a3,
+	 i8 signext  %a4, i8 signext  %a5, i8 signext  %a6) {
+entry:
+        %a = alloca %struct.s
+        %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+        store i8 %a1, i8* %tmp, align 8
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        store i8 %a2, i8* %tmp2, align 8
+        %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+        store i8 %a3, i8* %tmp4, align 8
+        %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+        store i8 %a4, i8* %tmp6, align 8
+        %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+        store i8 %a5, i8* %tmp8, align 8
+        %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+        store i8 %a6, i8* %tmp10, align 8
+        call void @f( %struct.s* %a byval )
+        call void @f( %struct.s* %a byval )
+        ret void
+}
+
+declare void @f(%struct.s* byval)

diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
new file mode 100644
index 0000000..b060369
--- /dev/null
+++ b/test/CodeGen/X86/byval6.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 | grep add | not grep 16
+
+	%struct.W = type { x86_fp80, x86_fp80 }
+@B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
+@.cpx = internal constant %struct.W { x86_fp80 0xK4001E000000000000000, x86_fp80 0xK40028000000000000000 }
+
+define i32 @main() nounwind  {
+entry:
+	tail call void (i32, ...)* @bar( i32 3, %struct.W* byval  @.cpx ) nounwind 
+	tail call void (i32, ...)* @baz( i32 3, %struct.W* byval  @B ) nounwind 
+	ret i32 undef
+}
+
+declare void @bar(i32, ...)
+
+declare void @baz(i32, ...)

diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
new file mode 100644
index 0000000..0da93ba
--- /dev/null
+++ b/test/CodeGen/X86/byval7.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
+
+	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+                           <2 x i64> }
+
+define i32 @main() nounwind  {
+entry:
+	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
+	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
+	call void @t( i32 1, %struct.S* byval  %s ) nounwind 
+	ret i32 0
+}
+
+declare void @t(i32, %struct.S* byval )

diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
new file mode 100644
index 0000000..87785bc
--- /dev/null
+++ b/test/CodeGen/X86/call-imm.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=static | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-darwin-apple -relocation-model=pic | not grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+
+; Call to immediate is not safe on x86-64 unless we *know* that the
+; call will be within 32-bits pcrel from the dest immediate.
+
+; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax}
+
+; PR3666
+; PR3773
+; rdar://6904453
+
+define i32 @main() nounwind {
+entry:
+	%0 = call i32 inttoptr (i32 12345678 to i32 (i32)*)(i32 0) nounwind		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
new file mode 100644
index 0000000..02cbccc
--- /dev/null
+++ b/test/CodeGen/X86/call-push.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s
+
+        %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
+        %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
+
+define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
+; CHECK: decode_byte:
+; CHECK: pushl
+; CHECK: popl
+; CHECK: popl
+; CHECK: jmp
+entry:
+        %tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4           ; <i16*> [#uses=1]
+        %tmp23 = bitcast i16* %tmp2 to i32*             ; <i32*> [#uses=1]
+        %tmp4 = load i32* %tmp23                ; <i32> [#uses=1]
+        %tmp514 = lshr i32 %tmp4, 24            ; <i32> [#uses=1]
+        %tmp56 = trunc i32 %tmp514 to i8                ; <i8> [#uses=1]
+        %tmp7 = icmp eq i8 %tmp56, 0            ; <i1> [#uses=1]
+        br i1 %tmp7, label %UnifiedReturnBlock, label %cond_true
+
+cond_true:              ; preds = %entry
+        %tmp10 = tail call i32 @f( %struct.decode_t* %decode )          ; <i32> [#uses=1]
+        ret i32 %tmp10
+
+UnifiedReturnBlock:             ; preds = %entry
+        ret i32 0
+}
+
+declare i32 @f(%struct.decode_t*)

diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
new file mode 100644
index 0000000..d520a6f
--- /dev/null
+++ b/test/CodeGen/X86/change-compare-stride-0.ll

@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {cmpl	\$-478,} %t
+; RUN: not grep inc %t
+; RUN: not grep {leal	1(} %t
+; RUN: not grep {leal	-1(} %t
+; RUN: grep dec %t | count 1
+
+define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+bb4.thread:
+	br label %bb2.outer
+
+bb2.outer:		; preds = %bb4, %bb4.thread
+	%indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ]		; <i32> [#uses=3]
+	%tmp34 = mul i32 %indvar18, 65535		; <i32> [#uses=1]
+	%i.0.reg2mem.0.ph = add i32 %tmp34, 639		; <i32> [#uses=1]
+	%0 = and i32 %i.0.reg2mem.0.ph, 65535		; <i32> [#uses=1]
+	%1 = mul i32 %0, 480		; <i32> [#uses=1]
+	%tmp20 = mul i32 %indvar18, -478		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %bb2.outer
+	%indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i32> [#uses=3]
+	%ctg2 = getelementptr i8* %out, i32 %tmp20		; <i8*> [#uses=1]
+	%tmp21 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
+	%tmp23 = sub i32 %tmp21, %indvar		; <i32> [#uses=1]
+	%out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8*		; <i8*> [#uses=1]
+	%tmp25 = mul i32 %indvar, 65535		; <i32> [#uses=1]
+	%j.0.reg2mem.0 = add i32 %tmp25, 479		; <i32> [#uses=1]
+	%2 = and i32 %j.0.reg2mem.0, 65535		; <i32> [#uses=1]
+	%3 = add i32 %1, %2		; <i32> [#uses=9]
+	%4 = add i32 %3, -481		; <i32> [#uses=1]
+	%5 = getelementptr i8* %in, i32 %4		; <i8*> [#uses=1]
+	%6 = load i8* %5, align 1		; <i8> [#uses=1]
+	%7 = add i32 %3, -480		; <i32> [#uses=1]
+	%8 = getelementptr i8* %in, i32 %7		; <i8*> [#uses=1]
+	%9 = load i8* %8, align 1		; <i8> [#uses=1]
+	%10 = add i32 %3, -479		; <i32> [#uses=1]
+	%11 = getelementptr i8* %in, i32 %10		; <i8*> [#uses=1]
+	%12 = load i8* %11, align 1		; <i8> [#uses=1]
+	%13 = add i32 %3, -1		; <i32> [#uses=1]
+	%14 = getelementptr i8* %in, i32 %13		; <i8*> [#uses=1]
+	%15 = load i8* %14, align 1		; <i8> [#uses=1]
+	%16 = getelementptr i8* %in, i32 %3		; <i8*> [#uses=1]
+	%17 = load i8* %16, align 1		; <i8> [#uses=1]
+	%18 = add i32 %3, 1		; <i32> [#uses=1]
+	%19 = getelementptr i8* %in, i32 %18		; <i8*> [#uses=1]
+	%20 = load i8* %19, align 1		; <i8> [#uses=1]
+	%21 = add i32 %3, 481		; <i32> [#uses=1]
+	%22 = getelementptr i8* %in, i32 %21		; <i8*> [#uses=1]
+	%23 = load i8* %22, align 1		; <i8> [#uses=1]
+	%24 = add i32 %3, 480		; <i32> [#uses=1]
+	%25 = getelementptr i8* %in, i32 %24		; <i8*> [#uses=1]
+	%26 = load i8* %25, align 1		; <i8> [#uses=1]
+	%27 = add i32 %3, 479		; <i32> [#uses=1]
+	%28 = getelementptr i8* %in, i32 %27		; <i8*> [#uses=1]
+	%29 = load i8* %28, align 1		; <i8> [#uses=1]
+	%30 = add i8 %9, %6		; <i8> [#uses=1]
+	%31 = add i8 %30, %12		; <i8> [#uses=1]
+	%32 = add i8 %31, %15		; <i8> [#uses=1]
+	%33 = add i8 %32, %17		; <i8> [#uses=1]
+	%34 = add i8 %33, %20		; <i8> [#uses=1]
+	%35 = add i8 %34, %23		; <i8> [#uses=1]
+	%36 = add i8 %35, %26		; <i8> [#uses=1]
+	%37 = add i8 %36, %29		; <i8> [#uses=1]
+	store i8 %37, i8* %out_addr.0.reg2mem.0, align 1
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 478		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb4, label %bb2
+
+bb4:		; preds = %bb2
+	%indvar.next28 = add i32 %indvar18, 1		; <i32> [#uses=2]
+	%exitcond29 = icmp eq i32 %indvar.next28, 638		; <i1> [#uses=1]
+	br i1 %exitcond29, label %return, label %bb2.outer
+
+return:		; preds = %bb4
+	ret void
+}

diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
new file mode 100644
index 0000000..a9ddbdb
--- /dev/null
+++ b/test/CodeGen/X86/change-compare-stride-1.ll

@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep {cmpq	\$-478,} %t
+; RUN: not grep inc %t
+; RUN: not grep {leal	1(} %t
+; RUN: not grep {leal	-1(} %t
+; RUN: grep dec %t | count 1
+
+define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+bb4.thread:
+	br label %bb2.outer
+
+bb2.outer:		; preds = %bb4, %bb4.thread
+	%indvar19 = phi i64 [ 0, %bb4.thread ], [ %indvar.next29, %bb4 ]		; <i64> [#uses=3]
+	%indvar31 = trunc i64 %indvar19 to i16		; <i16> [#uses=1]
+	%i.0.reg2mem.0.ph = sub i16 639, %indvar31		; <i16> [#uses=1]
+	%0 = zext i16 %i.0.reg2mem.0.ph to i32		; <i32> [#uses=1]
+	%1 = mul i32 %0, 480		; <i32> [#uses=1]
+	%tmp21 = mul i64 %indvar19, -478		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %bb2.outer
+	%indvar = phi i64 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i64> [#uses=3]
+	%indvar16 = trunc i64 %indvar to i16		; <i16> [#uses=1]
+	%ctg2 = getelementptr i8* %out, i64 %tmp21		; <i8*> [#uses=1]
+	%tmp22 = ptrtoint i8* %ctg2 to i64		; <i64> [#uses=1]
+	%tmp24 = sub i64 %tmp22, %indvar		; <i64> [#uses=1]
+	%out_addr.0.reg2mem.0 = inttoptr i64 %tmp24 to i8*		; <i8*> [#uses=1]
+	%j.0.reg2mem.0 = sub i16 479, %indvar16		; <i16> [#uses=1]
+	%2 = zext i16 %j.0.reg2mem.0 to i32		; <i32> [#uses=1]
+	%3 = add i32 %1, %2		; <i32> [#uses=9]
+	%4 = add i32 %3, -481		; <i32> [#uses=1]
+	%5 = zext i32 %4 to i64		; <i64> [#uses=1]
+	%6 = getelementptr i8* %in, i64 %5		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = add i32 %3, -480		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr i8* %in, i64 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = add i32 %3, -479		; <i32> [#uses=1]
+	%13 = zext i32 %12 to i64		; <i64> [#uses=1]
+	%14 = getelementptr i8* %in, i64 %13		; <i8*> [#uses=1]
+	%15 = load i8* %14, align 1		; <i8> [#uses=1]
+	%16 = add i32 %3, -1		; <i32> [#uses=1]
+	%17 = zext i32 %16 to i64		; <i64> [#uses=1]
+	%18 = getelementptr i8* %in, i64 %17		; <i8*> [#uses=1]
+	%19 = load i8* %18, align 1		; <i8> [#uses=1]
+	%20 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%21 = getelementptr i8* %in, i64 %20		; <i8*> [#uses=1]
+	%22 = load i8* %21, align 1		; <i8> [#uses=1]
+	%23 = add i32 %3, 1		; <i32> [#uses=1]
+	%24 = zext i32 %23 to i64		; <i64> [#uses=1]
+	%25 = getelementptr i8* %in, i64 %24		; <i8*> [#uses=1]
+	%26 = load i8* %25, align 1		; <i8> [#uses=1]
+	%27 = add i32 %3, 481		; <i32> [#uses=1]
+	%28 = zext i32 %27 to i64		; <i64> [#uses=1]
+	%29 = getelementptr i8* %in, i64 %28		; <i8*> [#uses=1]
+	%30 = load i8* %29, align 1		; <i8> [#uses=1]
+	%31 = add i32 %3, 480		; <i32> [#uses=1]
+	%32 = zext i32 %31 to i64		; <i64> [#uses=1]
+	%33 = getelementptr i8* %in, i64 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%35 = add i32 %3, 479		; <i32> [#uses=1]
+	%36 = zext i32 %35 to i64		; <i64> [#uses=1]
+	%37 = getelementptr i8* %in, i64 %36		; <i8*> [#uses=1]
+	%38 = load i8* %37, align 1		; <i8> [#uses=1]
+	%39 = add i8 %11, %7		; <i8> [#uses=1]
+	%40 = add i8 %39, %15		; <i8> [#uses=1]
+	%41 = add i8 %40, %19		; <i8> [#uses=1]
+	%42 = add i8 %41, %22		; <i8> [#uses=1]
+	%43 = add i8 %42, %26		; <i8> [#uses=1]
+	%44 = add i8 %43, %30		; <i8> [#uses=1]
+	%45 = add i8 %44, %34		; <i8> [#uses=1]
+	%46 = add i8 %45, %38		; <i8> [#uses=1]
+	store i8 %46, i8* %out_addr.0.reg2mem.0, align 1
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next, 478		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb4, label %bb2
+
+bb4:		; preds = %bb2
+	%indvar.next29 = add i64 %indvar19, 1		; <i64> [#uses=2]
+	%exitcond30 = icmp eq i64 %indvar.next29, 638		; <i1> [#uses=1]
+	br i1 %exitcond30, label %return, label %bb2.outer
+
+return:		; preds = %bb4
+	ret void
+}

diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
new file mode 100644
index 0000000..3f27187
--- /dev/null
+++ b/test/CodeGen/X86/clz.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 | grep bsr | count 2
+; RUN: llc < %s -march=x86 | grep bsf
+; RUN: llc < %s -march=x86 | grep cmov | count 3
+
+define i32 @t1(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
+	ret i32 %tmp
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+
+define i32 @t2(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
+	ret i32 %tmp
+}
+
+declare i32 @llvm.cttz.i32(i32) nounwind readnone 
+
+define i16 @t3(i16 %x, i16 %y) nounwind  {
+entry:
+        %tmp1 = add i16 %x, %y
+	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1]
+	ret i16 %tmp2
+}
+
+declare i16 @llvm.ctlz.i16(i16) nounwind readnone 

diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
new file mode 100644
index 0000000..39d9d1e
--- /dev/null
+++ b/test/CodeGen/X86/cmov.ll

@@ -0,0 +1,157 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test1:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovael	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1]
+	ret i32 %.0
+}
+define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test2:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovbl	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 12, i32 %v		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+
+; x86's 32-bit cmov doesn't clobber the high 32 bits of the destination
+; if the condition is false. An explicit zero-extend (movl) is needed
+; after the cmov.
+
+declare void @bar(i64) nounwind
+
+define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
+; CHECK: test3:
+; CHECK:      cmovnel %edi, %esi
+; CHECK-NEXT: movl    %esi, %edi
+
+  %c = trunc i64 %a to i32
+  %d = trunc i64 %b to i32
+  %e = select i1 %p, i32 %c, i32 %d
+  %f = zext i32 %e to i64
+  call void @bar(i64 %f)
+  ret void
+}
+
+
+
+; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional
+; move without recomputing EFLAGS, because the expansion of the conditional
+; move with control flow may clobber EFLAGS (e.g., with xor, to set the
+; register to zero).
+
+; The test is a little awkward; the important part is that there's a test before the
+; setne.
+; PR4814
+
+
+@g_3 = external global i8                         ; <i8*> [#uses=1]
+@g_96 = external global i8                        ; <i8*> [#uses=2]
+@g_100 = external global i8                       ; <i8*> [#uses=2]
+@_2E_str = external constant [15 x i8], align 1   ; <[15 x i8]*> [#uses=1]
+
+define i32 @test4() nounwind {
+entry:
+  %0 = load i8* @g_3, align 1                     ; <i8> [#uses=2]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=1]
+  %.lobit.i = lshr i8 %0, 7                       ; <i8> [#uses=1]
+  %tmp.i = zext i8 %.lobit.i to i32               ; <i32> [#uses=1]
+  %tmp.not.i = xor i32 %tmp.i, 1                  ; <i32> [#uses=1]
+  %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i       ; <i32> [#uses=1]
+  %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
+  %2 = icmp eq i8 %retval56.i.i, 0                ; <i1> [#uses=2]
+  %g_96.promoted.i = load i8* @g_96               ; <i8> [#uses=3]
+  %3 = icmp eq i8 %g_96.promoted.i, 0             ; <i1> [#uses=2]
+  br i1 %3, label %func_4.exit.i, label %bb.i.i.i
+
+bb.i.i.i:                                         ; preds = %entry
+  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_4.exit.i
+
+; CHECK: test4:
+; CHECK: g_100
+; CHECK: testb
+; CHECK: testb %al, %al
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb
+
+func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
+  %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
+  %brmerge.i = or i1 %3, %.not.i                  ; <i1> [#uses=1]
+  %.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0 ; <i8> [#uses=1]
+  br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
+
+bb.i.i:                                           ; preds = %func_4.exit.i
+  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_1.exit
+
+func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
+  %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
+  store i8 %g_96.tmp.0.i, i8* @g_96
+  %6 = zext i8 %g_96.tmp.0.i to i32               ; <i32> [#uses=1]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test5(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test5:
+; CHECK: 	setg	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	orl	$-2, %eax
+; CHECK:	ret
+
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+define i32 @test6(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test6:
+; CHECK: 	setl	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	leal	4(%rax,%rax,8), %eax
+; CHECK:        ret
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+
+; Don't try to use a 16-bit conditional move to do an 8-bit select,
+; because it isn't worth it. Just use a branch instead.
+define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
+; CHECK: test7:
+; CHECK:     testb	$1, %dil
+; CHECK-NEXT:     jne	LBB
+
+  %d = select i1 %c, i8 %a, i8 %b
+  ret i8 %d
+}

diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
new file mode 100644
index 0000000..898c09b
--- /dev/null
+++ b/test/CodeGen/X86/cmp-test.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep test | count 1
+
+define i32 @f1(i32 %X, i32* %y) {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+}
+
+define i32 @f2(i32 %X, i32* %y) {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]
+	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
new file mode 100644
index 0000000..48784488
--- /dev/null
+++ b/test/CodeGen/X86/cmp0.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define i64 @test0(i64 %x) nounwind {
+  %t = icmp eq i64 %x, 0
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test0:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	sete	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
+define i64 @test1(i64 %x) nounwind {
+  %t = icmp slt i64 %x, 1
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test1:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	setle	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+

diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
new file mode 100644
index 0000000..9a8e00c
--- /dev/null
+++ b/test/CodeGen/X86/cmp2.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
+
+define i32 @test(double %A) nounwind  {
+ entry:
+ %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
+ %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
+ %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
+ br i1 %bothcond, label %bb8, label %bb12
+
+ bb8:; preds = %entry
+ %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ ret i32 %tmp9
+
+ bb12:; preds = %entry
+ ret i32 32
+}
+
+declare i32 @foo(...)

diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
new file mode 100644
index 0000000..0fe4e56
--- /dev/null
+++ b/test/CodeGen/X86/coalesce-esp.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s | grep {movl	%esp, %eax}
+; PR4572
+
+; Don't coalesce with %esp if it would end up putting %esp in
+; the index position of an address, because that can't be
+; encoded on x86. It would actually be slightly better to
+; swap the address operands though, since there's no scale.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-mingw32"
+	%"struct.std::valarray<unsigned int>" = type { i32, i32* }
+
+define void @_ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_(i32 %__o, %"struct.std::valarray<unsigned int>"* nocapture %__l, %"struct.std::valarray<unsigned int>"* nocapture %__s, %"struct.std::valarray<unsigned int>"* nocapture %__i) nounwind {
+entry:
+	%0 = alloca i32, i32 undef, align 4		; <i32*> [#uses=1]
+	br i1 undef, label %return, label %bb4
+
+bb4:		; preds = %bb7.backedge, %entry
+	%indvar = phi i32 [ %indvar.next, %bb7.backedge ], [ 0, %entry ]		; <i32> [#uses=2]
+	%scevgep24.sum = sub i32 undef, %indvar		; <i32> [#uses=2]
+	%scevgep25 = getelementptr i32* %0, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%scevgep27 = getelementptr i32* undef, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%1 = load i32* %scevgep27, align 4		; <i32> [#uses=0]
+	br i1 undef, label %bb7.backedge, label %bb5
+
+bb5:		; preds = %bb4
+	store i32 0, i32* %scevgep25, align 4
+	br label %bb7.backedge
+
+bb7.backedge:		; preds = %bb5, %bb4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb4
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
new file mode 100644
index 0000000..8aa0bfd
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-commute1.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; PR1877
+
+@NNTOT = weak global i32 0		; <i32*> [#uses=1]
+@G = weak global float 0.000000e+00		; <float*> [#uses=1]
+
+define void @runcont(i32* %source) nounwind  {
+entry:
+	%tmp10 = load i32* @NNTOT, align 4		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%neuron.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%thesum.0 = phi float [ 0.000000e+00, %entry ], [ %tmp6, %bb ]		; <float> [#uses=1]
+	%tmp2 = getelementptr i32* %source, i32 %neuron.0		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+	%tmp34 = sitofp i32 %tmp3 to float		; <float> [#uses=1]
+	%tmp6 = fadd float %tmp34, %thesum.0		; <float> [#uses=2]
+	%indvar.next = add i32 %neuron.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %tmp10		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb13, label %bb
+
+bb13:		; preds = %bb
+	volatile store float %tmp6, float* @G, align 4
+	ret void
+}

diff --git a/test/CodeGen/X86/coalescer-commute2.ll b/test/CodeGen/X86/coalescer-commute2.ll
new file mode 100644
index 0000000..5d10bba
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-commute2.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86-64 | grep paddw | count 2
+; RUN: llc < %s -march=x86-64 | not grep mov
+
+; The 2-addr pass should ensure that identical code is produced for these functions
+; no extra copy should be generated.
+
+define <2 x i64> @test1(<2 x i64> %x, <2 x i64> %y) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %y to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %x to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = add <8 x i16> %tmp8, %tmp6		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+define <2 x i64> @test2(<2 x i64> %x, <2 x i64> %y) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %x to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %y to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = add <8 x i16> %tmp8, %tmp6		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+
+; The coalescer should commute the add to avoid a copy.
+define <4 x float> @test3(<4 x float> %V) {
+entry:
+        %tmp8 = shufflevector <4 x float> %V, <4 x float> undef,
+                                        <4 x i32> < i32 3, i32 2, i32 1, i32 0 >
+        %add = fadd <4 x float> %tmp8, %V
+        ret <4 x float> %add
+}
+

diff --git a/test/CodeGen/X86/coalescer-commute3.ll b/test/CodeGen/X86/coalescer-commute3.ll
new file mode 100644
index 0000000..e5bd448
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-commute3.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define  i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind  {
+entry:
+	switch i32 %size, label %UnifiedReturnBlock [
+		 i32 2, label %bb
+		 i32 0, label %bb50
+	]
+
+bb:		; preds = %entry
+	%tmp31 = tail call  i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind 		; <i32> [#uses=1]
+	%tmp40 = tail call  i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind 		; <i32> [#uses=1]
+	%tmp33 = add i32 0, %tmp31		; <i32> [#uses=1]
+	%tmp42 = add i32 %tmp33, %tmp40		; <i32> [#uses=1]
+	ret i32 %tmp42
+
+bb50:		; preds = %entry
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/coalescer-commute4.ll b/test/CodeGen/X86/coalescer-commute4.ll
new file mode 100644
index 0000000..02a9781
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-commute4.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; PR1501
+
+define float @foo(i32* %x, float* %y, i32 %c) nounwind  {
+entry:
+	%tmp2132 = icmp eq i32 %c, 0		; <i1> [#uses=2]
+	br i1 %tmp2132, label %bb23, label %bb.preheader
+
+bb.preheader:		; preds = %entry
+	%umax = select i1 %tmp2132, i32 1, i32 %c		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %bb.preheader ], [ %tmp14, %bb ]		; <float> [#uses=1]
+	%tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
+	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
+	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb23, label %bb
+
+bb23:		; preds = %bb, %entry
+	%res.0.reg2mem.1 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb ]		; <float> [#uses=1]
+	ret float %res.0.reg2mem.1
+}

diff --git a/test/CodeGen/X86/coalescer-commute5.ll b/test/CodeGen/X86/coalescer-commute5.ll
new file mode 100644
index 0000000..510d115
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-commute5.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+
+define i32 @t() {
+entry:
+	br i1 true, label %bb1664, label %bb1656
+bb1656:		; preds = %entry
+	ret i32 0
+bb1664:		; preds = %entry
+	%tmp4297 = bitcast <16 x i8> zeroinitializer to <2 x i64>		; <<2 x i64>> [#uses=2]
+	%tmp4351 = call <16 x i8> @llvm.x86.sse2.pcmpeq.b( <16 x i8> zeroinitializer, <16 x i8> zeroinitializer ) nounwind readnone 		; <<16 x i8>> [#uses=0]
+	br i1 false, label %bb5310, label %bb4743
+bb4743:		; preds = %bb1664
+	%tmp4360.not28 = or <2 x i64> zeroinitializer, %tmp4297		; <<2 x i64>> [#uses=1]
+	br label %bb5310
+bb5310:		; preds = %bb4743, %bb1664
+	%tmp4360.not28.pn = phi <2 x i64> [ %tmp4360.not28, %bb4743 ], [ %tmp4297, %bb1664 ]		; <<2 x i64>> [#uses=1]
+	%tmp4415.not.pn = or <2 x i64> zeroinitializer, %tmp4360.not28.pn		; <<2 x i64>> [#uses=0]
+	ret i32 0
+}
+
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone 

diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
new file mode 100644
index 0000000..7d6f399
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-cross.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | not grep movaps
+; rdar://6509240
+
+	type { %struct.TValue }		; type %0
+	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
+	%struct.GCObject = type { %struct.lua_State }
+	%struct.L_Umaxalign = type { double }
+	%struct.Mbuffer = type { i8*, i32, i32 }
+	%struct.Node = type { %struct.TValue, %struct.TKey }
+	%struct.TKey = type { %1 }
+	%struct.TString = type { %struct.anon }
+	%struct.TValue = type { %struct.L_Umaxalign, i32 }
+	%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
+	%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
+	%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
+	%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
+	%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+	%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
+	%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
+	%struct.stringtable = type { %struct.GCObject**, i32, i32 }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
+entry:
+	%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind		; <i32> [#uses=1]
+	%1 = uitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1]
+	%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3]
+	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
+	%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %2, double* %5, align 4
+	%6 = getelementptr %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 3, i32* %6, align 4
+	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
+	%8 = getelementptr %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1]
+	store %struct.TValue* %8, %struct.TValue** %3, align 4
+	ret i32 1
+}
+
+declare i32 @"\01_clock$UNIX2003"()

diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
new file mode 100644
index 0000000..4db520f
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-remat.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xor | count 3
+
+@val = internal global i64 0		; <i64*> [#uses=1]
+@"\01LC" = internal constant [7 x i8] c"0x%lx\0A\00"		; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* @val, i64 0, i64 1)		; <i64> [#uses=1]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64*, i64, i64) nounwind
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/test/CodeGen/X86/code_placement.ll b/test/CodeGen/X86/code_placement.ll
new file mode 100644
index 0000000..9747183
--- /dev/null
+++ b/test/CodeGen/X86/code_placement.ll

@@ -0,0 +1,136 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+@Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
+@Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
+@Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+
+define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
+entry:
+	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32* %rk, i64 1		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
+	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
+	br label %bb
+; CHECK: jmp
+; CHECK-NEXT: align
+
+bb:		; preds = %bb1, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
+	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
+	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
+	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
+	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
+	%8 = and i32 %7, 255		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
+	%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
+	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = xor i32 %11, %6		; <i32> [#uses=1]
+	%16 = xor i32 %15, %14		; <i32> [#uses=3]
+	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
+	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
+	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
+	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
+	%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
+	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
+	%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = xor i32 %24, %20		; <i32> [#uses=1]
+	%29 = xor i32 %28, %27		; <i32> [#uses=4]
+	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
+	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
+	%34 = getelementptr i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
+	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%37 = and i32 %36, 255		; <i32> [#uses=1]
+	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
+	%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%42 = xor i32 %40, %33		; <i32> [#uses=1]
+	%43 = xor i32 %42, %41		; <i32> [#uses=1]
+	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = and i32 %16, 255		; <i32> [#uses=1]
+	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
+	%52 = getelementptr i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
+	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = xor i32 %51, %47		; <i32> [#uses=1]
+	%56 = xor i32 %55, %54		; <i32> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br label %bb
+
+bb2:		; preds = %bb
+	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
+	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
+	%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
+	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%60 = and i32 %59, 255		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
+	%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
+	%65 = or i32 %64, %58		; <i32> [#uses=1]
+	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%67 = xor i32 %65, %66		; <i32> [#uses=2]
+	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
+	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
+	%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
+	%73 = and i32 %16, 255		; <i32> [#uses=1]
+	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
+	%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
+	%78 = or i32 %77, %72		; <i32> [#uses=1]
+	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
+	%79 = getelementptr i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
+	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = xor i32 %78, %81		; <i32> [#uses=2]
+	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
+	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
+	store i8 %84, i8* %out, align 1
+	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
+	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
+	%87 = getelementptr i8* %out, i64 1		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = getelementptr i8* %out, i64 4		; <i8*> [#uses=1]
+	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
+	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
+	store i8 %90, i8* %88, align 1
+	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
+	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
+	%93 = getelementptr i8* %out, i64 5		; <i8*> [#uses=1]
+	store i8 %92, i8* %93, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/codegen-dce.ll b/test/CodeGen/X86/codegen-dce.ll
new file mode 100644
index 0000000..d83efaf
--- /dev/null
+++ b/test/CodeGen/X86/codegen-dce.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -stats |& grep {codegen-dce} | grep {Number of dead instructions deleted}
+
+	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+	%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+	%0 = malloc %struct.anon		; <%struct.anon*> [#uses=2]
+	%1 = getelementptr %struct.anon* %0, i32 0, i32 2		; <%struct.node**> [#uses=1]
+	br label %bb14.i
+
+bb14.i:		; preds = %bb14.i, %entry
+	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %2, %bb14.i ]		; <i32> [#uses=1]
+	%2 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond74.i = icmp eq i32 %2, 32		; <i1> [#uses=1]
+	br i1 %exitcond74.i, label %bb32.i, label %bb14.i
+
+bb32.i:		; preds = %bb32.i, %bb14.i
+	%tmp.0.reg2mem.0.i = phi i32 [ %indvar.next63.i, %bb32.i ], [ 0, %bb14.i ]		; <i32> [#uses=1]
+	%indvar.next63.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond64.i = icmp eq i32 %indvar.next63.i, 64		; <i1> [#uses=1]
+	br i1 %exitcond64.i, label %bb47.loopexit.i, label %bb32.i
+
+bb.i.i:		; preds = %bb47.loopexit.i
+	unreachable
+
+stepsystem.exit.i:		; preds = %bb47.loopexit.i
+	store %struct.node* null, %struct.node** %1, align 4
+	br label %bb.i6.i
+
+bb.i6.i:		; preds = %bb.i6.i, %stepsystem.exit.i
+	br i1 false, label %bb107.i.i, label %bb.i6.i
+
+bb107.i.i:		; preds = %bb107.i.i, %bb.i6.i
+	%q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %3, %bb.i6.i ]		; <%struct.bnode**> [#uses=0]
+	br label %bb107.i.i
+
+bb47.loopexit.i:		; preds = %bb32.i
+	%3 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 0		; <%struct.bnode**> [#uses=1]
+	%4 = icmp eq %struct.node* null, null		; <i1> [#uses=1]
+	br i1 %4, label %stepsystem.exit.i, label %bb.i.i
+}

diff --git a/test/CodeGen/X86/codegen-prepare-cast.ll b/test/CodeGen/X86/codegen-prepare-cast.ll
new file mode 100644
index 0000000..2a8ead8
--- /dev/null
+++ b/test/CodeGen/X86/codegen-prepare-cast.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64
+; PR4297
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+        %"byte[]" = type { i64, i8* }
+        %"char[][]" = type { i64, %"byte[]"* }
+@.str = external constant [7 x i8]              ; <[7 x i8]*> [#uses=1]
+
+define fastcc i32 @_Dmain(%"char[][]" %unnamed) {
+entry:
+        %tmp = getelementptr [7 x i8]* @.str, i32 0, i32 0              ; <i8*> [#uses=1]
+        br i1 undef, label %foreachbody, label %foreachend
+
+foreachbody:            ; preds = %entry
+        %tmp4 = getelementptr i8* %tmp, i32 undef               ; <i8*> [#uses=1]
+        %tmp5 = load i8* %tmp4          ; <i8> [#uses=0]
+        unreachable
+
+foreachend:             ; preds = %entry
+        ret i32 0
+}
+

diff --git a/test/CodeGen/X86/codegen-prepare-extload.ll b/test/CodeGen/X86/codegen-prepare-extload.ll
new file mode 100644
index 0000000..9f57d53
--- /dev/null
+++ b/test/CodeGen/X86/codegen-prepare-extload.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7304838
+
+; CodeGenPrepare should move the zext into the block with the load
+; so that SelectionDAG can select it with the load.
+
+; CHECK: movzbl (%rdi), %eax
+
+define void @foo(i8* %p, i32* %q) {
+entry:
+  %t = load i8* %p
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %t to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}

diff --git a/test/CodeGen/X86/codemodel.ll b/test/CodeGen/X86/codemodel.ll
new file mode 100644
index 0000000..b6ca1ce
--- /dev/null
+++ b/test/CodeGen/X86/codemodel.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -code-model=small  | FileCheck -check-prefix CHECK-SMALL %s
+; RUN: llc < %s -code-model=kernel | FileCheck -check-prefix CHECK-KERNEL %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@data = external global [0 x i32]		; <[0 x i32]*> [#uses=5]
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo:
+; CHECK-SMALL:   movl data(%rip), %eax
+; CHECK-KERNEL: foo:
+; CHECK-KERNEL:  movl data, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo2() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo2:
+; CHECK-SMALL:   movl data+40(%rip), %eax
+; CHECK-KERNEL: foo2:
+; CHECK-KERNEL:  movl data+40, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo3() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo3:
+; CHECK-SMALL:   movl data-40(%rip), %eax
+; CHECK-KERNEL: foo3:
+; CHECK-KERNEL:  movq $-40, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo4() nounwind readonly {
+entry:
+; FIXME: We really can use movabsl here!
+; CHECK-SMALL:  foo4:
+; CHECK-SMALL:   movl $16777216, %eax
+; CHECK-SMALL:   movl data(%rax), %eax
+; CHECK-KERNEL: foo4:
+; CHECK-KERNEL:  movl data+16777216, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo1() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo1:
+; CHECK-SMALL:   movl data+16777212(%rip), %eax
+; CHECK-KERNEL: foo1:
+; CHECK-KERNEL:  movl data+16777212, %eax
+        %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
+        ret i32 %0
+}
+define i32 @foo5() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo5:
+; CHECK-SMALL:   movl data-16777216(%rip), %eax
+; CHECK-KERNEL: foo5:
+; CHECK-KERNEL:  movq $-16777216, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/combine-lds.ll b/test/CodeGen/X86/combine-lds.ll
new file mode 100644
index 0000000..b49d081
--- /dev/null
+++ b/test/CodeGen/X86/combine-lds.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fldl | count 1
+
+define double @doload64(i64 %x) nounwind  {
+	%tmp717 = bitcast i64 %x to double
+	ret double %tmp717
+}

diff --git a/test/CodeGen/X86/combiner-aa-0.ll b/test/CodeGen/X86/combiner-aa-0.ll
new file mode 100644
index 0000000..a61ef7a
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-0.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -combiner-global-alias-analysis -combiner-alias-analysis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%struct.Hash_Key = type { [4 x i32], i32 }
+@g_flipV_hashkey = external global %struct.Hash_Key, align 16		; <%struct.Hash_Key*> [#uses=1]
+
+define void @foo() nounwind {
+	%t0 = load i32* undef, align 16		; <i32> [#uses=1]
+	%t1 = load i32* null, align 4		; <i32> [#uses=1]
+	%t2 = srem i32 %t0, 32		; <i32> [#uses=1]
+	%t3 = shl i32 1, %t2		; <i32> [#uses=1]
+	%t4 = xor i32 %t3, %t1		; <i32> [#uses=1]
+	store i32 %t4, i32* null, align 4
+	%t5 = getelementptr %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0		; <i32*> [#uses=2]
+	%t6 = load i32* %t5, align 4		; <i32> [#uses=1]
+	%t7 = shl i32 1, undef		; <i32> [#uses=1]
+	%t8 = xor i32 %t7, %t6		; <i32> [#uses=1]
+	store i32 %t8, i32* %t5, align 4
+	unreachable
+}

diff --git a/test/CodeGen/X86/combiner-aa-1.ll b/test/CodeGen/X86/combiner-aa-1.ll
new file mode 100644
index 0000000..58a7129
--- /dev/null
+++ b/test/CodeGen/X86/combiner-aa-1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s --combiner-alias-analysis --combiner-global-alias-analysis
+; PR4880
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%struct.alst_node = type { %struct.node }
+%struct.arg_node = type { %struct.node, i8*, %struct.alst_node* }
+%struct.arglst_node = type { %struct.alst_node, %struct.arg_node*, %struct.arglst_node* }
+%struct.lam_node = type { %struct.alst_node, %struct.arg_node*, %struct.alst_node* }
+%struct.node = type { i32 (...)**, %struct.node* }
+
+define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
+comb_entry:
+  %.SV59 = alloca %struct.node*                   ; <%struct.node**> [#uses=1]
+  %0 = load i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
+  %1 = getelementptr inbounds i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
+  %2 = load i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
+  store %struct.node* undef, %struct.node** %.SV59
+  %3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
+  %4 = tail call i32 %3(%struct.node* undef)      ; <i32> [#uses=0]
+  unreachable
+}

diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
new file mode 100644
index 0000000..d810cb1
--- /dev/null
+++ b/test/CodeGen/X86/commute-intrinsic.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+
+@a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
+
+define <2 x i64> @madd(<2 x i64> %b) nounwind  {
+entry:
+	%tmp2 = load <2 x i64>* @a, align 16		; <<2 x i64>> [#uses=1]
+	%tmp6 = bitcast <2 x i64> %b to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp11 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd( <8 x i16> %tmp9, <8 x i16> %tmp6 ) nounwind readnone 		; <<4 x i32>> [#uses=1]
+	%tmp14 = bitcast <4 x i32> %tmp11 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp14
+}
+
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 

diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
new file mode 100644
index 0000000..56ea26b
--- /dev/null
+++ b/test/CodeGen/X86/commute-two-addr.ll

@@ -0,0 +1,25 @@
+; The register allocator can commute two-address instructions to avoid
+; insertion of register-register copies.
+
+; Make sure there are only 3 mov's for each testcase
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {\\\<mov\\\>} | count 6
+
+
+target triple = "i686-pc-linux-gnu"
+@G = external global i32                ; <i32*> [#uses=2]
+
+declare void @ext(i32)
+
+define i32 @add_test(i32 %X, i32 %Y) {
+        %Z = add i32 %X, %Y             ; <i32> [#uses=1]
+        store i32 %Z, i32* @G
+        ret i32 %X
+}
+
+define i32 @xor_test(i32 %X, i32 %Y) {
+        %Z = xor i32 %X, %Y             ; <i32> [#uses=1]
+        store i32 %Z, i32* @G
+        ret i32 %X
+}
+

diff --git a/test/CodeGen/X86/compare-add.ll b/test/CodeGen/X86/compare-add.ll
new file mode 100644
index 0000000..358ee59
--- /dev/null
+++ b/test/CodeGen/X86/compare-add.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep add
+
+define i1 @X(i32 %X) {
+        %Y = add i32 %X, 14             ; <i32> [#uses=1]
+        %Z = icmp ne i32 %Y, 12345              ; <i1> [#uses=1]
+        ret i1 %Z
+}
+

diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
new file mode 100644
index 0000000..2be90c9
--- /dev/null
+++ b/test/CodeGen/X86/compare-inf.ll

@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
+; and negative infinity, because those are more efficient on x86.
+
+; CHECK: oeq_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: oeq_neg_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_neg_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_neg_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @une_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_neg_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @une_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}

diff --git a/test/CodeGen/X86/compare_folding.ll b/test/CodeGen/X86/compare_folding.ll
new file mode 100644
index 0000000..84c152d
--- /dev/null
+++ b/test/CodeGen/X86/compare_folding.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
+; RUN:   grep movsd | count 1
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
+; RUN:   grep ucomisd
+declare i1 @llvm.isunordered.f64(double, double)
+
+define i1 @test1(double %X, double %Y) {
+        %COM = fcmp uno double %X, %Y           ; <i1> [#uses=1]
+        ret i1 %COM
+}
+

diff --git a/test/CodeGen/X86/compiler_used.ll b/test/CodeGen/X86/compiler_used.ll
new file mode 100644
index 0000000..be8de5e
--- /dev/null
+++ b/test/CodeGen/X86/compiler_used.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
+; We should have a .no_dead_strip directive for Z but not for X/Y.
+
+@X = internal global i8 4
+@Y = internal global i32 123
+@Z = internal global i8 4
+
+@llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
+@llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"

diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
new file mode 100644
index 0000000..7e7acaa
--- /dev/null
+++ b/test/CodeGen/X86/complex-fca.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep mov | count 2
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
+entry:
+	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
+	%z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
+	%0 = fsub x86_fp80 0xK80000000000000000000, %z9
+	%insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
+	%insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
+	call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret, { x86_fp80, x86_fp80 }) nounwind

diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll
new file mode 100644
index 0000000..ca8cc14
--- /dev/null
+++ b/test/CodeGen/X86/const-select.ll

@@ -0,0 +1,22 @@
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+; RUN: llc < %s | grep {LCPI1_0(,%eax,4)}
+define float @f(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
+	ret float %iftmp.0.0
+}
+
+; RUN: llc < %s | grep {movsbl.*(%e.x,%e.x,4), %eax}
+define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
+	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
+	%2 = load i8* %1, align 1		; <i8> [#uses=1]
+	ret i8 %2
+}
+

diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
new file mode 100644
index 0000000..05388f9
--- /dev/null
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats  -info-output-file - | grep asm-printer | grep 12
+
+declare float @qux(float %y)
+
+define float @array(float %a) nounwind {
+  %n = fmul float %a, 9.0
+  %m = call float @qux(float %n)
+  %o = fmul float %m, 9.0
+  ret float %o
+}

diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll
new file mode 100644
index 0000000..c3e97ad
--- /dev/null
+++ b/test/CodeGen/X86/constant-pool-sharing.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; llc should share constant pool entries between this integer vector
+; and this floating-point vector since they have the same encoding.
+
+; CHECK:  LCPI1_0(%rip), %xmm0
+; CHECK:  movaps        %xmm0, (%rdi)
+; CHECK:  movaps        %xmm0, (%rsi)
+
+define void @foo(<4 x i32>* %p, <4 x float>* %q, i1 %t) nounwind {
+entry:
+  br label %loop
+loop:
+  store <4 x i32><i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, <4 x i32>* %p
+  store <4 x float><float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float>* %q
+  br i1 %t, label %loop, label %ret
+ret:
+  ret void
+}

diff --git a/test/CodeGen/X86/constpool.ll b/test/CodeGen/X86/constpool.ll
new file mode 100644
index 0000000..2aac486
--- /dev/null
+++ b/test/CodeGen/X86/constpool.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s 
+; RUN: llc < %s -fast-isel
+; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -fast-isel -march=x86-64
+; PR4466
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.7"
+
+define i32 @main() nounwind {
+entry:
+	%0 = fcmp oeq float undef, 0x7FF0000000000000		; <i1> [#uses=1]
+	%1 = zext i1 %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	ret i32 undef
+}

diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
new file mode 100644
index 0000000..8e38fe3
--- /dev/null
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 -o %t -stats -info-output-file - | \
+; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 10
+; RUN: grep {leal	1(\%rsi),} %t
+
+define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
+entry:
+  %0 = add i32 %i2, 1           ; <i32> [#uses=1]
+  %1 = sext i32 %0 to i64               ; <i64> [#uses=1]
+  %2 = getelementptr i8* %ptr, i64 %1           ; <i8*> [#uses=1]
+  %3 = load i8* %2, align 1             ; <i8> [#uses=1]
+  %4 = icmp eq i8 0, %3         ; <i1> [#uses=1]
+  br i1 %4, label %bb3, label %bb34
+
+bb3:            ; preds = %entry
+  %5 = add i32 %i2, 4           ; <i32> [#uses=0]
+  %6 = trunc i32 %5 to i8
+  ret i8 %6
+
+bb34:           ; preds = %entry
+  ret i8 0
+}
+

diff --git a/test/CodeGen/X86/copysign-zero.ll b/test/CodeGen/X86/copysign-zero.ll
new file mode 100644
index 0000000..47522d8
--- /dev/null
+++ b/test/CodeGen/X86/copysign-zero.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s | not grep orpd
+; RUN: llc < %s | grep andpd | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define double @test(double %X) nounwind  {
+entry:
+	%tmp2 = tail call double @copysign( double 0.000000e+00, double %X ) nounwind readnone 		; <double> [#uses=1]
+	ret double %tmp2
+}
+
+declare double @copysign(double, double) nounwind readnone 
+

diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
new file mode 100644
index 0000000..4fe554d
--- /dev/null
+++ b/test/CodeGen/X86/critical-edge-split.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -tailcallopt=false -stats -info-output-file - | grep asm-printer | grep 31
+
+	%CC = type { %Register }
+	%II = type { %"struct.XX::II::$_74" }
+	%JITFunction = type %YYValue* (%CC*, %YYValue**)
+	%YYValue = type { i32 (...)** }
+	%Register = type { %"struct.XX::ByteCodeFeatures" }
+	%"struct.XX::ByteCodeFeatures" = type { i32 }
+	%"struct.XX::II::$_74" = type { i8* }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %YYValue* @loop(%CC*, %YYValue**) nounwind {
+; <label>:2
+	%3 = getelementptr %CC* %0, i32 -9		; <%CC*> [#uses=1]
+	%4 = bitcast %CC* %3 to %YYValue**		; <%YYValue**> [#uses=2]
+	%5 = load %YYValue** %4		; <%YYValue*> [#uses=3]
+	%unique_1.i = ptrtoint %YYValue* %5 to i1		; <i1> [#uses=1]
+	br i1 %unique_1.i, label %loop, label %11
+
+loop:		; preds = %6, %2
+	%.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ]		; <%YYValue*> [#uses=3]
+	%immediateCmp = icmp slt %YYValue* %.1, %5		; <i1> [#uses=1]
+	br i1 %immediateCmp, label %6, label %8
+
+; <label>:6		; preds = %loop
+	%lhsInt = ptrtoint %YYValue* %.1 to i32		; <i32> [#uses=1]
+	%7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2)		; <{ i32, i1 }> [#uses=2]
+	%intAdd = extractvalue { i32, i1 } %7, 0		; <i32> [#uses=1]
+	%intAddValue = inttoptr i32 %intAdd to %YYValue*		; <%YYValue*> [#uses=1]
+	%intAddOverflow = extractvalue { i32, i1 } %7, 1		; <i1> [#uses=1]
+	br i1 %intAddOverflow, label %.loopexit, label %loop
+
+; <label>:8		; preds = %loop
+	ret %YYValue* inttoptr (i32 10 to %YYValue*)
+
+.loopexit:		; preds = %6
+	%9 = bitcast %CC* %0 to %YYValue**		; <%YYValue**> [#uses=1]
+	store %YYValue* %.1, %YYValue** %9
+	store %YYValue* %5, %YYValue** %4
+	%10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1)		; <%YYValue*> [#uses=1]
+	ret %YYValue* %10
+
+; <label>:11		; preds = %2
+	%12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1)		; <%YYValue*> [#uses=1]
+	ret %YYValue* %12
+}
+
+declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind

diff --git a/test/CodeGen/X86/cstring.ll b/test/CodeGen/X86/cstring.ll
new file mode 100644
index 0000000..5b5a766
--- /dev/null
+++ b/test/CodeGen/X86/cstring.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep comm
+; rdar://6479858
+
+@str1 = internal constant [1 x i8] zeroinitializer

diff --git a/test/CodeGen/X86/dag-rauw-cse.ll b/test/CodeGen/X86/dag-rauw-cse.ll
new file mode 100644
index 0000000..edcfeb7
--- /dev/null
+++ b/test/CodeGen/X86/dag-rauw-cse.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep {orl	\$1}
+; PR3018
+
+define i32 @test(i32 %A) nounwind {
+  %B = or i32 %A, 1
+  %C = or i32 %B, 1
+  %D = and i32 %C, 7057
+  ret i32 %D
+}

diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
new file mode 100644
index 0000000..c0ee2ac
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx -o %t
+; RUN: grep unpcklpd %t | count 1
+; RUN: grep movapd %t | count 1
+; RUN: grep movaps %t | count 1
+
+; Shows a dag combine bug that will generate an illegal build vector
+; with v2i64 build_vector i32, i32.
+
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
+entry:
+        %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
+        store <2 x double> %tmp7.i, <2 x double>* %dst
+        ret void
+}
+
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+        %tmp1 = load <4 x i16>* %src
+        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+        store <4 x i32> %0, <4 x i32>* %dest
+        ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone

diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
new file mode 100644
index 0000000..c3c7990
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine-cse.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+
+define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
+entry:
+	%tmp7 = mul i32 %idxY, %ref_frame_stride		; <i32> [#uses=2]
+	%tmp9 = add i32 %tmp7, %idxX		; <i32> [#uses=1]
+	%tmp11 = getelementptr i8* %ref_frame_ptr, i32 %tmp9		; <i8*> [#uses=1]
+	%tmp1112 = bitcast i8* %tmp11 to i32*		; <i32*> [#uses=1]
+	%tmp13 = load i32* %tmp1112, align 4		; <i32> [#uses=1]
+	%tmp18 = add i32 %idxX, 4		; <i32> [#uses=1]
+	%tmp20.sum = add i32 %tmp18, %tmp7		; <i32> [#uses=1]
+	%tmp21 = getelementptr i8* %ref_frame_ptr, i32 %tmp20.sum		; <i8*> [#uses=1]
+	%tmp2122 = bitcast i8* %tmp21 to i16*		; <i16*> [#uses=1]
+	%tmp23 = load i16* %tmp2122, align 2		; <i16> [#uses=1]
+	%tmp2425 = zext i16 %tmp23 to i64		; <i64> [#uses=1]
+	%tmp26 = shl i64 %tmp2425, 32		; <i64> [#uses=1]
+	%tmp2728 = zext i32 %tmp13 to i64		; <i64> [#uses=1]
+	%tmp29 = or i64 %tmp26, %tmp2728		; <i64> [#uses=1]
+	%tmp3454 = bitcast i64 %tmp29 to double		; <double> [#uses=1]
+	%tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0		; <<2 x double>> [#uses=1]
+	%tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
+	%tmp42 = bitcast <2 x double> %tmp36 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp48 = extractelement <4 x i32> %tmp47, i32 0		; <i32> [#uses=1]
+	ret i32 %tmp48
+}

diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
new file mode 100644
index 0000000..a9573cf
--- /dev/null
+++ b/test/CodeGen/X86/darwin-bzero.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
+define void @foo(i8* %p, i32 %len) {
+  call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1)
+  ret void
+}

diff --git a/test/CodeGen/X86/darwin-no-dead-strip.ll b/test/CodeGen/X86/darwin-no-dead-strip.ll
new file mode 100644
index 0000000..452d1f8
--- /dev/null
+++ b/test/CodeGen/X86/darwin-no-dead-strip.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s | grep no_dead_strip
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+@x = weak global i32 0          ; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32* @x to i8*) ]                ; <[1 x i8*]*> [#uses=0]
+

diff --git a/test/CodeGen/X86/darwin-quote.ll b/test/CodeGen/X86/darwin-quote.ll
new file mode 100644
index 0000000..8fddc11
--- /dev/null
+++ b/test/CodeGen/X86/darwin-quote.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  | FileCheck %s
+
+
+define internal i64 @baz() nounwind {
+  %tmp = load i64* @"+x"
+  ret i64 %tmp
+; CHECK: _baz:
+; CHECK:    movl "L_+x$non_lazy_ptr", %ecx
+}
+
+
+@"+x" = external global i64
+
+; CHECK: "L_+x$non_lazy_ptr":
+; CHECK:	.indirect_symbol "_+x"

diff --git a/test/CodeGen/X86/darwin-stub.ll b/test/CodeGen/X86/darwin-stub.ll
new file mode 100644
index 0000000..b4d2e1a
--- /dev/null
+++ b/test/CodeGen/X86/darwin-stub.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  |     grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | not grep stub
+
+@"\01LC" = internal constant [13 x i8] c"Hello World!\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call i32 @puts(i8* getelementptr ([13 x i8]* @"\01LC", i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @puts(i8*)

diff --git a/test/CodeGen/X86/dg.exp b/test/CodeGen/X86/dg.exp
new file mode 100644
index 0000000..629a147
--- /dev/null
+++ b/test/CodeGen/X86/dg.exp

@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}

diff --git a/test/CodeGen/X86/discontiguous-loops.ll b/test/CodeGen/X86/discontiguous-loops.ll
new file mode 100644
index 0000000..479c450
--- /dev/null
+++ b/test/CodeGen/X86/discontiguous-loops.ll

@@ -0,0 +1,72 @@
+; RUN: llc -verify-loop-info -verify-dom-info -march=x86-64 < %s
+; PR5243
+
+@.str96 = external constant [37 x i8], align 8    ; <[37 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+bb:
+  br label %ybb1
+
+ybb1:                                              ; preds = %yybb13, %xbb6, %bb
+  switch i32 undef, label %bb18 [
+    i32 150, label %ybb2
+    i32 151, label %bb17
+    i32 152, label %bb19
+    i32 157, label %ybb8
+  ]
+
+ybb2:                                              ; preds = %ybb1
+  %tmp = icmp eq i8** undef, null                 ; <i1> [#uses=1]
+  br i1 %tmp, label %bb3, label %xbb6
+
+bb3:                                              ; preds = %ybb2
+  unreachable
+
+xbb4:                                              ; preds = %xbb6
+  store i32 0, i32* undef, align 8
+  br i1 undef, label %xbb6, label %bb5
+
+bb5:                                              ; preds = %xbb4
+  call fastcc void @decl_mode_check_failed() nounwind
+  unreachable
+
+xbb6:                                              ; preds = %xbb4, %ybb2
+  %tmp7 = icmp slt i32 undef, 0                   ; <i1> [#uses=1]
+  br i1 %tmp7, label %xbb4, label %ybb1
+
+ybb8:                                              ; preds = %ybb1
+  %tmp9 = icmp eq i8** undef, null                ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb10, label %ybb12
+
+bb10:                                             ; preds = %ybb8
+  %tmp11 = load i8** undef, align 8               ; <i8*> [#uses=1]
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
+  unreachable
+
+ybb12:                                             ; preds = %ybb8
+  br i1 undef, label %bb15, label %ybb13
+
+ybb13:                                             ; preds = %ybb12
+  %tmp14 = icmp sgt i32 undef, 0                  ; <i1> [#uses=1]
+  br i1 %tmp14, label %bb16, label %ybb1
+
+bb15:                                             ; preds = %ybb12
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
+  unreachable
+
+bb16:                                             ; preds = %ybb13
+  unreachable
+
+bb17:                                             ; preds = %ybb1
+  unreachable
+
+bb18:                                             ; preds = %ybb1
+  unreachable
+
+bb19:                                             ; preds = %ybb1
+  unreachable
+}
+
+declare void @fatal(i8*, ...)
+
+declare fastcc void @decl_mode_check_failed() nounwind

diff --git a/test/CodeGen/X86/div_const.ll b/test/CodeGen/X86/div_const.ll
new file mode 100644
index 0000000..f0ada41
--- /dev/null
+++ b/test/CodeGen/X86/div_const.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | grep 365384439
+
+define i32 @f9188_mul365384439_shift27(i32 %A) {
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+

diff --git a/test/CodeGen/X86/divrem.ll b/test/CodeGen/X86/divrem.ll
new file mode 100644
index 0000000..e86b52f
--- /dev/null
+++ b/test/CodeGen/X86/divrem.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+
+define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
+	%r = sdiv i64 %x, %y
+	%t = srem i64 %x, %y
+	store i64 %r, i64* %p
+	store i64 %t, i64* %q
+	ret void
+}
+define void @si32(i32 %x, i32 %y, i32* %p, i32* %q) {
+	%r = sdiv i32 %x, %y
+	%t = srem i32 %x, %y
+	store i32 %r, i32* %p
+	store i32 %t, i32* %q
+	ret void
+}
+define void @si16(i16 %x, i16 %y, i16* %p, i16* %q) {
+	%r = sdiv i16 %x, %y
+	%t = srem i16 %x, %y
+	store i16 %r, i16* %p
+	store i16 %t, i16* %q
+	ret void
+}
+define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) {
+	%r = sdiv i8 %x, %y
+	%t = srem i8 %x, %y
+	store i8 %r, i8* %p
+	store i8 %t, i8* %q
+	ret void
+}
+define void @ui64(i64 %x, i64 %y, i64* %p, i64* %q) {
+	%r = udiv i64 %x, %y
+	%t = urem i64 %x, %y
+	store i64 %r, i64* %p
+	store i64 %t, i64* %q
+	ret void
+}
+define void @ui32(i32 %x, i32 %y, i32* %p, i32* %q) {
+	%r = udiv i32 %x, %y
+	%t = urem i32 %x, %y
+	store i32 %r, i32* %p
+	store i32 %t, i32* %q
+	ret void
+}
+define void @ui16(i16 %x, i16 %y, i16* %p, i16* %q) {
+	%r = udiv i16 %x, %y
+	%t = urem i16 %x, %y
+	store i16 %r, i16* %p
+	store i16 %t, i16* %q
+	ret void
+}
+define void @ui8(i8 %x, i8 %y, i8* %p, i8* %q) {
+	%r = udiv i8 %x, %y
+	%t = urem i8 %x, %y
+	store i8 %r, i8* %p
+	store i8 %t, i8* %q
+	ret void
+}

diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
new file mode 100644
index 0000000..c634c7e
--- /dev/null
+++ b/test/CodeGen/X86/dll-linkage.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32 | FileCheck %s
+
+declare dllimport void @foo()
+
+define void @bar() nounwind {
+; CHECK: call	*__imp__foo
+  call void @foo()
+  ret void
+}

diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
new file mode 100644
index 0000000..3b26319
--- /dev/null
+++ b/test/CodeGen/X86/dollar-name.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux | FileCheck %s
+; PR1339
+
+@"$bar" = global i32 zeroinitializer
+@"$qux" = external global i32
+
+define i32 @"$foo"() nounwind {
+; CHECK: movl	($bar),
+; CHECK: addl	($qux),
+; CHECK: call	($hen)
+  %m = load i32* @"$bar"
+  %n = load i32* @"$qux"
+  %t = add i32 %m, %n
+  %u = call i32 @"$hen"(i32 %t)
+  ret i32 %u
+}
+
+declare i32 @"$hen"(i32 %a)

diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
new file mode 100644
index 0000000..1df0920
--- /dev/null
+++ b/test/CodeGen/X86/dyn-stackalloc.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7}
+; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16}
+; RUN: llc < %s -march=x86-64 | grep {\\-16}
+
+define void @t() nounwind {
+A:
+	br label %entry
+
+entry:
+	%m1 = alloca i32, align 4
+	%m2 = alloca [7 x i8], align 16
+	call void @s( i32* %m1, [7 x i8]* %m2 )
+	ret void
+}
+
+declare void @s(i32*, [7 x i8]*)

diff --git a/test/CodeGen/X86/empty-struct-return-type.ll b/test/CodeGen/X86/empty-struct-return-type.ll
new file mode 100644
index 0000000..34cd5d9
--- /dev/null
+++ b/test/CodeGen/X86/empty-struct-return-type.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep call
+; PR4688
+
+; Return types can be empty structs, which can be awkward.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN15QtSharedPointer22internalSafetyCheckAddEPVKv(i8* %ptr) {
+entry:
+	%0 = call { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** undef)		; <{ }> [#uses=0]
+        ret void
+}
+
+declare hidden { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** nocapture) nounwind

diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
new file mode 100644
index 0000000..52dcb61
--- /dev/null
+++ b/test/CodeGen/X86/epilogue.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i64 %h) {
+  %k = trunc i64 %h to i32
+  %p = alloca <2 x i64>, i32 %k
+  call void @bar(<2 x i64>* %p)
+  ret void
+}

diff --git a/test/CodeGen/X86/extend.ll b/test/CodeGen/X86/extend.ll
new file mode 100644
index 0000000..9553b1b
--- /dev/null
+++ b/test/CodeGen/X86/extend.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
+
+@G1 = internal global i8 0              ; <i8*> [#uses=1]
+@G2 = internal global i8 0              ; <i8*> [#uses=1]
+
+define i16 @test1() {
+        %tmp.0 = load i8* @G1           ; <i8> [#uses=1]
+        %tmp.3 = zext i8 %tmp.0 to i16          ; <i16> [#uses=1]
+        ret i16 %tmp.3
+}
+
+define i16 @test2() {
+        %tmp.0 = load i8* @G2           ; <i8> [#uses=1]
+        %tmp.3 = sext i8 %tmp.0 to i16          ; <i16> [#uses=1]
+        ret i16 %tmp.3
+}
+

diff --git a/test/CodeGen/X86/extern_weak.ll b/test/CodeGen/X86/extern_weak.ll
new file mode 100644
index 0000000..01e32aa
--- /dev/null
+++ b/test/CodeGen/X86/extern_weak.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep weak_reference | count 2
+
+@Y = global i32 (i8*)* @X               ; <i32 (i8*)**> [#uses=0]
+
+declare extern_weak i32 @X(i8*)
+
+define void @bar() {
+        tail call void (...)* @foo( )
+        ret void
+}
+
+declare extern_weak void @foo(...)
+

diff --git a/test/CodeGen/X86/extmul128.ll b/test/CodeGen/X86/extmul128.ll
new file mode 100644
index 0000000..9b59829
--- /dev/null
+++ b/test/CodeGen/X86/extmul128.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | grep mul | count 2
+
+define i128 @i64_sext_i128(i64 %a, i64 %b) {
+  %aa = sext i64 %a to i128
+  %bb = sext i64 %b to i128
+  %cc = mul i128 %aa, %bb
+  ret i128 %cc
+}
+define i128 @i64_zext_i128(i64 %a, i64 %b) {
+  %aa = zext i64 %a to i128
+  %bb = zext i64 %b to i128
+  %cc = mul i128 %aa, %bb
+  ret i128 %cc
+}

diff --git a/test/CodeGen/X86/extmul64.ll b/test/CodeGen/X86/extmul64.ll
new file mode 100644
index 0000000..9e20ded
--- /dev/null
+++ b/test/CodeGen/X86/extmul64.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep mul | count 2
+
+define i64 @i32_sext_i64(i32 %a, i32 %b) {
+  %aa = sext i32 %a to i64
+  %bb = sext i32 %b to i64
+  %cc = mul i64 %aa, %bb
+  ret i64 %cc
+}
+define i64 @i32_zext_i64(i32 %a, i32 %b) {
+  %aa = zext i32 %a to i64
+  %bb = zext i32 %b to i64
+  %cc = mul i64 %aa, %bb
+  ret i64 %cc
+}

diff --git a/test/CodeGen/X86/extract-combine.ll b/test/CodeGen/X86/extract-combine.ll
new file mode 100644
index 0000000..2040e87
--- /dev/null
+++ b/test/CodeGen/X86/extract-combine.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -o %t
+; RUN: not grep unpcklps %t
+
+define i32 @foo() nounwind {
+entry:
+	%tmp74.i25762 = shufflevector <16 x float> zeroinitializer, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>		; <<16 x float>> [#uses=1]
+	%tmp518 = shufflevector <16 x float> %tmp74.i25762, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>		; <<4 x float>> [#uses=1]
+	%movss.i25611 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp518, <4 x i32> <i32 4, i32 1, i32 2, i32 3>		; <<4 x float>> [#uses=1]
+	%conv3.i25615 = shufflevector <4 x float> %movss.i25611, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>		; <<4 x float>> [#uses=1]
+	%sub.i25620 = fsub <4 x float> %conv3.i25615, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul.i25621 = fmul <4 x float> zeroinitializer, %sub.i25620		; <<4 x float>> [#uses=1]
+	%add.i25622 = fadd <4 x float> zeroinitializer, %mul.i25621		; <<4 x float>> [#uses=1]
+	store <4 x float> %add.i25622, <4 x float>* null
+	unreachable
+}

diff --git a/test/CodeGen/X86/extract-extract.ll b/test/CodeGen/X86/extract-extract.ll
new file mode 100644
index 0000000..ad79ab9
--- /dev/null
+++ b/test/CodeGen/X86/extract-extract.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 >/dev/null
+; PR4699
+
+; Handle this extractvalue-of-extractvalue case without getting in
+; trouble with CSE in DAGCombine.
+
+        %cc = type { %crd }
+        %cr = type { i32 }
+        %crd = type { i64, %cr* }
+        %pp = type { %cc }
+
+define fastcc void @foo(%pp* nocapture byval %p_arg) {
+entry:
+        %tmp2 = getelementptr %pp* %p_arg, i64 0, i32 0         ; <%cc*> [#uses=
+        %tmp3 = load %cc* %tmp2         ; <%cc> [#uses=1]
+        %tmp34 = extractvalue %cc %tmp3, 0              ; <%crd> [#uses=1]
+        %tmp345 = extractvalue %crd %tmp34, 0           ; <i64> [#uses=1]
+        %.ptr.i = load %cr** undef              ; <%cr*> [#uses=0]
+        %tmp15.i = shl i64 %tmp345, 3           ; <i64> [#uses=0]
+        store %cr* undef, %cr** undef
+        ret void
+}
+
+

diff --git a/test/CodeGen/X86/extractelement-from-arg.ll b/test/CodeGen/X86/extractelement-from-arg.ll
new file mode 100644
index 0000000..4ea37f0
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-from-arg.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
+
+define void @test(float* %R, <4 x float> %X) nounwind {
+	%tmp = extractelement <4 x float> %X, i32 3
+	store float %tmp, float* %R
+	ret void
+}

diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
new file mode 100644
index 0000000..ee57d9b
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-load.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
+
+define i32 @t(<2 x i64>* %val) nounwind  {
+	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
+	%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp3, i32 2		; <i32> [#uses=1]
+	ret i32 %tmp4
+}

diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll
new file mode 100644
index 0000000..d1ba9a8
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-shuffle.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+; Examples that exhibits a bug in DAGCombine.  The case is triggered by the
+; following program.  The bug is DAGCombine assumes that the bit convert
+; preserves the number of elements so the optimization code tries to read
+; through the 3rd mask element, which doesn't exist.
+define i32 @update(<2 x i64> %val1, <2 x i64> %val2) nounwind readnone {
+entry:
+	%shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> <i32 0, i32 3>
+	%bit  = bitcast <2 x i64> %shuf to <4 x i32>
+	%res =  extractelement <4 x i32> %bit, i32 3
+	ret i32 %res
+}

diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll
new file mode 100644
index 0000000..14778f0
--- /dev/null
+++ b/test/CodeGen/X86/extractps.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn > %t
+; RUN: not grep movd %t
+; RUN: grep {movss	%xmm} %t | count 1
+; RUN: grep {extractps	\\\$1, %xmm0, } %t | count 1
+; PR2647
+
+external global float, align 16         ; <float*>:0 [#uses=2]
+
+define internal void @""() nounwind {
+        load float* @0, align 16                ; <float>:1 [#uses=1]
+        insertelement <4 x float> undef, float %1, i32 0                ; <<4 x float>>:2 [#uses=1]
+        call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
+        extractelement <4 x float> %3, i32 0            ; <float>:4 [#uses=1]
+        store float %4, float* @0, align 16
+        ret void
+}
+define internal void @""() nounwind {
+        load float* @0, align 16                ; <float>:1 [#uses=1]
+        insertelement <4 x float> undef, float %1, i32 1                ; <<4 x float>>:2 [#uses=1]
+        call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
+        extractelement <4 x float> %3, i32 1            ; <float>:4 [#uses=1]
+        store float %4, float* @0, align 16
+        ret void
+}
+
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+

diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
new file mode 100644
index 0000000..54947c3
--- /dev/null
+++ b/test/CodeGen/X86/fabs.ll

@@ -0,0 +1,28 @@
+; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
+; RUN:   count 2
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math | \
+; RUN:   grep fabs\$ | count 3
+
+declare float @fabsf(float)
+
+declare x86_fp80 @fabsl(x86_fp80)
+
+define float @test1(float %X) {
+        %Y = call float @fabsf(float %X)
+        ret float %Y
+}
+
+define double @test2(double %X) {
+        %Y = fcmp oge double %X, -0.0
+        %Z = fsub double -0.0, %X
+        %Q = select i1 %Y, double %X, double %Z
+        ret double %Q
+}
+
+define x86_fp80 @test3(x86_fp80 %X) {
+        %Y = call x86_fp80 @fabsl(x86_fp80 %X)
+        ret x86_fp80 %Y
+}
+
+

diff --git a/test/CodeGen/X86/fast-cc-callee-pops.ll b/test/CodeGen/X86/fast-cc-callee-pops.ll
new file mode 100644
index 0000000..5e88ed7
--- /dev/null
+++ b/test/CodeGen/X86/fast-cc-callee-pops.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret	20}
+
+; Check that a fastcc function pops its stack variables before returning.
+
+define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind {
+        ret void
+}

diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
new file mode 100644
index 0000000..e151821
--- /dev/null
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {add	ESP, 8}
+
+target triple = "i686-pc-linux-gnu"
+
+declare x86_fastcallcc void @func(i32*, i64)
+
+define x86_fastcallcc void @caller(i32, i64) {
+        %X = alloca i32         ; <i32*> [#uses=1]
+        call x86_fastcallcc void @func( i32* %X, i64 0 )
+        ret void
+}
+

diff --git a/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/test/CodeGen/X86/fast-cc-pass-in-regs.ll
new file mode 100644
index 0000000..fe96c0c
--- /dev/null
+++ b/test/CodeGen/X86/fast-cc-pass-in-regs.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {mov	EDX, 1}
+; check that fastcc is passing stuff in regs.
+
+declare x86_fastcallcc i64 @callee(i64)
+
+define i64 @caller() {
+        %X = call x86_fastcallcc  i64 @callee( i64 4294967299 )          ; <i64> [#uses=1]
+        ret i64 %X
+}
+
+define x86_fastcallcc i64 @caller2(i64 %X) {
+        ret i64 %X
+}
+

diff --git a/test/CodeGen/X86/fast-isel-bail.ll b/test/CodeGen/X86/fast-isel-bail.ll
new file mode 100644
index 0000000..9072c5c
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bail.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -O0
+
+; This file is for regression tests for cases where FastISel needs
+; to gracefully bail out and let SelectionDAGISel take over.
+
+	type { i64, i8* }		; type %0
+
+declare void @bar(%0)
+
+define fastcc void @foo() nounwind {
+entry:
+	call void @bar(%0 zeroinitializer)
+	unreachable
+}

diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
new file mode 100644
index 0000000..f2696ce
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bc.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx | FileCheck %s
+; PR4684
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.8"
+
+declare void @func2(<1 x i64>)
+
+define void @func1() nounwind {
+
+; This isn't spectacular, but it's MMX code at -O0...
+; CHECK: movl $2, %eax
+; CHECK: movd %rax, %mm0
+; CHECK: movd %mm0, %rdi
+
+        call void @func2(<1 x i64> <i64 2>)
+        ret void
+}

diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
new file mode 100644
index 0000000..5fcdbbb
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-call.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -fast-isel -march=x86 | grep and
+
+define i32 @t() nounwind {
+tak:
+	%tmp = call i1 @foo()
+	br i1 %tmp, label %BB1, label %BB2
+BB1:
+	ret i32 1
+BB2:
+	ret i32 0
+}
+
+declare i1 @foo() zeroext nounwind

diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
new file mode 100644
index 0000000..84d10f3
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-constpool.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -fast-isel | grep {LCPI1_0(%rip)}
+; Make sure fast isel uses rip-relative addressing when required.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.0"
+
+define i32 @f0(double %x) nounwind {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%x.addr = alloca double		; <double*> [#uses=2]
+	store double %x, double* %x.addr
+	%tmp = load double* %x.addr		; <double> [#uses=1]
+	%cmp = fcmp olt double %tmp, 8.500000e-01		; <i1> [#uses=1]
+	%conv = zext i1 %cmp to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %retval
+	%0 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
new file mode 100644
index 0000000..5ffd48b
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-fneg.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64 | FileCheck %s
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+
+; CHECK: doo:
+; CHECK: xor
+define double @doo(double %x) nounwind {
+  %y = fsub double -0.0, %x
+  ret double %y
+}
+
+; CHECK: foo:
+; CHECK: xor
+define float @foo(float %x) nounwind {
+  %y = fsub float -0.0, %x
+  ret float %y
+}

diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
new file mode 100644
index 0000000..5b8acec
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-gep.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
+
+; GEP indices are interpreted as signed integers, so they
+; should be sign-extended to 64 bits on 64-bit targets.
+; PR3181
+define i32 @test1(i32 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i32 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test1:
+; X32:  	movl	(%ecx,%eax,4), %eax
+; X32:  	ret
+
+; X64: test1:
+; X64:  	movslq	%edi, %rax
+; X64:  	movl	(%rsi,%rax,4), %eax
+; X64:  	ret
+
+}
+define i32 @test2(i64 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test2:
+; X32:  	movl	(%eax,%ecx,4), %eax
+; X32:  	ret
+
+; X64: test2:
+; X64:  	movl	(%rsi,%rdi,4), %eax
+; X64:  	ret
+}
+
+
+
+; PR4984
+define i8 @test3(i8* %start) nounwind {
+entry:
+  %A = getelementptr i8* %start, i64 -2               ; <i8*> [#uses=1]
+  %B = load i8* %A, align 1                       ; <i8> [#uses=1]
+  ret i8 %B
+  
+  
+; X32: test3:
+; X32:  	movl	4(%esp), %eax
+; X32:  	movb	-2(%eax), %al
+; X32:  	ret
+
+; X64: test3:
+; X64:  	movb	-2(%rdi), %al
+; X64:  	ret
+
+}

diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
new file mode 100644
index 0000000..34f8b38
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-gv.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+@f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
+
+declare signext i8 @kill(...)
+
+define i32 @main() nounwind ssp {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = load i8 (...)** @f, align 8		; <i8 (...)*> [#uses=1]
+	%2 = icmp ne i8 (...)* %1, @kill		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
+	store i32 %3, i32* %0, align 4
+	%4 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %4, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}

diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
new file mode 100644
index 0000000..d066578
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-i1.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -fast-isel | grep {andb	\$1, %}
+
+declare i64 @bar(i64)
+
+define i32 @foo(i64 %x) nounwind {
+	%y = add i64 %x, -3		; <i64> [#uses=1]
+	%t = call i64 @bar(i64 %y)		; <i64> [#uses=1]
+	%s = mul i64 %t, 77		; <i64> [#uses=1]
+	%z = trunc i64 %s to i1		; <i1> [#uses=1]
+	br label %next
+
+next:		; preds = %0
+	%u = zext i1 %z to i32		; <i32> [#uses=1]
+	%v = add i32 %u, 1999		; <i32> [#uses=1]
+	br label %exit
+
+exit:		; preds = %next
+	ret i32 %v
+}

diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
new file mode 100644
index 0000000..35ec1e7
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-mem.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
+; RUN:   grep lazy_ptr, | count 2
+; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
+; RUN:   grep lea
+
+@src = external global i32
+
+define i32 @loadgv() nounwind {
+entry:
+	%0 = load i32* @src, align 4
+	%1 = load i32* @src, align 4
+        %2 = add i32 %0, %1
+        store i32 %2, i32* @src
+	ret i32 %2
+}
+
+%stuff = type { i32 (...)** }
+@LotsStuff = external constant [4 x i32 (...)*]
+
+define void @t(%stuff* %this) nounwind {
+entry:
+	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
+	ret void
+}

diff --git a/test/CodeGen/X86/fast-isel-phys.ll b/test/CodeGen/X86/fast-isel-phys.ll
new file mode 100644
index 0000000..158ef55
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-phys.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86
+
+define i8 @t2(i8 %a, i8 %c) nounwind {
+       %tmp = shl i8 %a, %c
+       ret i8 %tmp
+}
+
+define i8 @t1(i8 %a) nounwind {
+       %tmp = mul i8 %a, 17
+       ret i8 %tmp
+}

diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll
new file mode 100644
index 0000000..35f7a72
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-shift-imm.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %eax}
+; PR3242
+
+define i32 @foo(i32 %x) nounwind {
+  %y = ashr i32 %x, 50000
+  ret i32 %y
+}

diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
new file mode 100644
index 0000000..c3e527c
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
+; PR4154
+
+; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
+; the stack pointer after the call in this code.
+
+define i32 @stub(i8* %t0) nounwind {
+entry:
+        %t1 = load i32* inttoptr (i32 139708680 to i32*)         ; <i32> [#uses=1]
+        %t2 = bitcast i8* %t0 to i32 (i32)*               ; <i32 (i32)*> [#uses=1]
+        %t3 = call fastcc i32 %t2(i32 %t1)         ; <i32> [#uses=1]
+        ret i32 %t3
+}

diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
new file mode 100644
index 0000000..a5e6642
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-tls.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
+; PR3654
+
+@v = thread_local global i32 0
+define i32 @f() nounwind {
+entry:
+          %t = load i32* @v
+          %s = add i32 %t, 1
+          ret i32 %s
+}

diff --git a/test/CodeGen/X86/fast-isel-trunc.ll b/test/CodeGen/X86/fast-isel-trunc.ll
new file mode 100644
index 0000000..69b26c5
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-trunc.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort
+; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort
+
+define i8 @t1(i32 %x) signext nounwind  {
+	%tmp1 = trunc i32 %x to i8
+	ret i8 %tmp1
+}
+
+define i8 @t2(i16 signext %x) signext nounwind  {
+	%tmp1 = trunc i16 %x to i8
+	ret i8 %tmp1
+}

diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
new file mode 100644
index 0000000..84b3fd7
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel.ll

@@ -0,0 +1,75 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
+
+; This tests very minimal fast-isel functionality.
+
+define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
+entry:
+  %r = load i32* %p
+  %s = load i32* %q
+  %y = load i32** %z
+  br label %fast
+
+fast:
+  %t0 = add i32 %r, %s
+  %t1 = mul i32 %t0, %s
+  %t2 = sub i32 %t1, %s
+  %t3 = and i32 %t2, %s
+  %t4 = xor i32 %t3, 3
+  %t5 = xor i32 %t4, %s
+  %t6 = add i32 %t5, 2
+  %t7 = getelementptr i32* %y, i32 1
+  %t8 = getelementptr i32* %t7, i32 %t6
+  br label %exit
+
+exit:
+  ret i32* %t8
+}
+
+define double @bar(double* %p, double* %q) nounwind {
+entry:
+  %r = load double* %p
+  %s = load double* %q
+  br label %fast
+
+fast:
+  %t0 = fadd double %r, %s
+  %t1 = fmul double %t0, %s
+  %t2 = fsub double %t1, %s
+  %t3 = fadd double %t2, 707.0
+  br label %exit
+
+exit:
+  ret double %t3
+}
+
+define i32 @cast() nounwind {
+entry:
+	%tmp2 = bitcast i32 0 to i32
+	ret i32 %tmp2
+}
+
+define i1 @ptrtoint_i1(i8* %p) nounwind {
+  %t = ptrtoint i8* %p to i1
+  ret i1 %t
+}
+define i8* @inttoptr_i1(i1 %p) nounwind {
+  %t = inttoptr i1 %p to i8*
+  ret i8* %t
+}
+define i32 @ptrtoint_i32(i8* %p) nounwind {
+  %t = ptrtoint i8* %p to i32
+  ret i32 %t
+}
+define i8* @inttoptr_i32(i32 %p) nounwind {
+  %t = inttoptr i32 %p to i8*
+  ret i8* %t
+}
+
+define void @store_i1(i1* %p, i1 %t) nounwind {
+  store i1 %t, i1* %p
+  ret void
+}
+define i1 @load_i1(i1* %p) nounwind {
+  %t = load i1* %p
+  ret i1 %t
+}

diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
new file mode 100644
index 0000000..2b48f5f
--- /dev/null
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-unknown-mingw32 | \
+; RUN:   grep {@12}
+
+; Check that a fastcall function gets correct mangling
+
+define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
+        ret void
+}
+

diff --git a/test/CodeGen/X86/fastcc-2.ll b/test/CodeGen/X86/fastcc-2.ll
new file mode 100644
index 0000000..d044a2a
--- /dev/null
+++ b/test/CodeGen/X86/fastcc-2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+
+define i32 @foo() nounwind {
+entry:
+	tail call fastcc void @bar( double 1.000000e+00 ) nounwind
+	ret i32 0
+}
+
+declare fastcc void @bar(double)

diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
new file mode 100644
index 0000000..52b3e57
--- /dev/null
+++ b/test/CodeGen/X86/fastcc-byval.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; PR3122
+; rdar://6400815
+
+; byval requires a copy, even with fastcc.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.MVT = type { i32 }
+
+define fastcc i32 @bar() nounwind {
+	%V = alloca %struct.MVT
+	%a = getelementptr %struct.MVT* %V, i32 0, i32 0
+	store i32 1, i32* %a
+	call fastcc void @foo(%struct.MVT* byval %V) nounwind
+	%t = load i32* %a
+	ret i32 %t
+}
+
+declare fastcc void @foo(%struct.MVT* byval)

diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
new file mode 100644
index 0000000..d457418
--- /dev/null
+++ b/test/CodeGen/X86/fastcc-sret.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
+
+	%struct.foo = type { [4 x i32] }
+
+define fastcc void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
+entry:
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
+	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
+	store i32 1, i32* %tmp3, align 8
+        ret void
+}
+
+@dst = external global i32
+
+define void @foo() nounwind {
+	%memtmp = alloca %struct.foo, align 4
+        call fastcc void @bar( %struct.foo* sret %memtmp ) nounwind
+        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
+	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
+        %tmp6 = load i32* %tmp5
+        store i32 %tmp6, i32* @dst
+        ret void
+}

diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
new file mode 100644
index 0000000..705ab7b
--- /dev/null
+++ b/test/CodeGen/X86/fastcc.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
+; CHECK: movsd %xmm0, 8(%esp)
+; CHECK: xorl %ecx, %ecx
+
+@d = external global double		; <double*> [#uses=1]
+@c = external global double		; <double*> [#uses=1]
+@b = external global double		; <double*> [#uses=1]
+@a = external global double		; <double*> [#uses=1]
+
+define i32 @foo() nounwind {
+entry:
+	%0 = load double* @d, align 8		; <double> [#uses=1]
+	%1 = load double* @c, align 8		; <double> [#uses=1]
+	%2 = load double* @b, align 8		; <double> [#uses=1]
+	%3 = load double* @a, align 8		; <double> [#uses=1]
+	tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind
+	ret i32 0
+}
+
+declare fastcc void @bar(i32, i32, i32, double, double, double, double, double)

diff --git a/test/CodeGen/X86/fastcc3struct.ll b/test/CodeGen/X86/fastcc3struct.ll
new file mode 100644
index 0000000..84f8ef6
--- /dev/null
+++ b/test/CodeGen/X86/fastcc3struct.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.48, %ecx" %t
+; RUN: grep "movl	.24, %edx" %t
+; RUN: grep "movl	.12, %eax" %t
+
+%0 = type { i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  ret %0 %2
+}
+

diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll
new file mode 100644
index 0000000..6020530
--- /dev/null
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86 | grep sar | count 1
+; RUN: llc < %s -march=x86-64 | not grep sar
+
+define i32 @test(i32 %f12) {
+	%tmp7.25 = lshr i32 %f12, 16		
+	%tmp7.26 = trunc i32 %tmp7.25 to i8
+	%tmp78.2 = sext i8 %tmp7.26 to i32
+	ret i32 %tmp78.2
+}
+
+define i32 @test2(i32 %f12) {
+	%f11 = shl i32 %f12, 8
+	%tmp7.25 = ashr i32 %f11, 24
+	ret i32 %tmp7.25
+}
+
+define i32 @test3(i32 %f12) {
+	%f11 = shl i32 %f12, 13
+	%tmp7.25 = ashr i32 %f11, 24
+	ret i32 %tmp7.25
+}
+
+define i64 @test4(i64 %f12) {
+	%f11 = shl i64 %f12, 32
+	%tmp7.25 = ashr i64 %f11, 32
+	ret i64 %tmp7.25
+}
+
+define i16 @test5(i16 %f12) {
+	%f11 = shl i16 %f12, 2
+	%tmp7.25 = ashr i16 %f11, 8
+	ret i16 %tmp7.25
+}
+
+define i16 @test6(i16 %f12) {
+	%f11 = shl i16 %f12, 8
+	%tmp7.25 = ashr i16 %f11, 8
+	ret i16 %tmp7.25
+}

diff --git a/test/CodeGen/X86/fildll.ll b/test/CodeGen/X86/fildll.ll
new file mode 100644
index 0000000..c5a3765
--- /dev/null
+++ b/test/CodeGen/X86/fildll.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
+
+define fastcc double @sint64_to_fp(i64 %X) {
+        %R = sitofp i64 %X to double            ; <double> [#uses=1]
+        ret double %R
+}
+
+define fastcc double @uint64_to_fp(i64 %X) {
+        %R = uitofp i64 %X to double            ; <double> [#uses=1]
+        ret double %R
+}
+

diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
new file mode 100644
index 0000000..03bad65
--- /dev/null
+++ b/test/CodeGen/X86/fmul-zero.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llc < %s -march=x86-64 | grep mulps
+
+define void @test14(<4 x float>*) nounwind {
+        load <4 x float>* %0, align 1
+        fmul <4 x float> %2, zeroinitializer
+        store <4 x float> %3, <4 x float>* %0, align 1
+        ret void
+}

diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll
new file mode 100644
index 0000000..5e80ea5
--- /dev/null
+++ b/test/CodeGen/X86/fold-add.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.6"
+@prev_length = internal global i32 0		; <i32*> [#uses=1]
+@window = internal global [65536 x i8] zeroinitializer, align 32		; <[65536 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @longest_match to i8*)]		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @longest_match(i32 %cur_match) nounwind {
+entry:
+	%0 = load i32* @prev_length, align 4		; <i32> [#uses=3]
+	%1 = zext i32 %cur_match to i64		; <i64> [#uses=1]
+	%2 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%.sum3 = add i64 %1, %2		; <i64> [#uses=1]
+	%3 = getelementptr [65536 x i8]* @window, i64 0, i64 %.sum3		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = icmp eq i8 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb5, label %bb23
+
+bb5:		; preds = %entry
+	ret i32 %0
+
+bb23:		; preds = %entry
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
new file mode 100644
index 0000000..9f79f77
--- /dev/null
+++ b/test/CodeGen/X86/fold-and-shift.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | not grep and
+
+define i32 @t1(i8* %X, i32 %i) {
+entry:
+	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
+	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
+	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
+	ret i32 %tmp9
+}
+
+define i32 @t2(i16* %X, i32 %i) {
+entry:
+	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
+	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
+	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
+	ret i32 %tmp9
+}

diff --git a/test/CodeGen/X86/fold-call-2.ll b/test/CodeGen/X86/fold-call-2.ll
new file mode 100644
index 0000000..7a2b038
--- /dev/null
+++ b/test/CodeGen/X86/fold-call-2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 1
+
+@f = external global void ()*		; <void ()**> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	load void ()** @f, align 8		; <void ()*>:0 [#uses=1]
+	tail call void %0( ) nounwind
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/fold-call-3.ll b/test/CodeGen/X86/fold-call-3.ll
new file mode 100644
index 0000000..337a7ed
--- /dev/null
+++ b/test/CodeGen/X86/fold-call-3.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep call | grep 560
+; rdar://6522427
+
+	%"struct.clang::Action" = type { %"struct.clang::ActionBase" }
+	%"struct.clang::ActionBase" = type { i32 (...)** }
+	%"struct.clang::ActionBase::ActionResult<0u>" = type { i8*, i8 }
+@NumTrials = internal global i32 10000000		; <i32*> [#uses=2]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (i8*, %"struct.clang::Action"*)* @_Z25RawPointerPerformanceTestPvRN5clang6ActionE to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(i8* %Val, %"struct.clang::Action"* %Actions) nounwind {
+entry:
+	%0 = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8		; <%"struct.clang::ActionBase::ActionResult<0u>"*> [#uses=3]
+	%1 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%3 = getelementptr %"struct.clang::Action"* %Actions, i64 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%mrv_gep = bitcast %"struct.clang::ActionBase::ActionResult<0u>"* %0 to i64*		; <i64*> [#uses=1]
+	%mrv_gep1 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 1		; <i8*> [#uses=1]
+	%4 = bitcast i8* %mrv_gep1 to i64*		; <i64*> [#uses=1]
+	%5 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 0		; <i8**> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%Trial.01 = phi i32 [ 0, %bb.nph ], [ %12, %bb ]		; <i32> [#uses=1]
+	%Val_addr.02 = phi i8* [ %Val, %bb.nph ], [ %11, %bb ]		; <i8*> [#uses=1]
+	%6 = load i32 (...)*** %3, align 8		; <i32 (...)**> [#uses=1]
+	%7 = getelementptr i32 (...)** %6, i64 70		; <i32 (...)**> [#uses=1]
+	%8 = load i32 (...)** %7, align 8		; <i32 (...)*> [#uses=1]
+	%9 = bitcast i32 (...)* %8 to { i64, i64 } (%"struct.clang::Action"*, i8*)*		; <{ i64, i64 } (%"struct.clang::Action"*, i8*)*> [#uses=1]
+	%10 = call { i64, i64 } %9(%"struct.clang::Action"* %Actions, i8* %Val_addr.02) nounwind		; <{ i64, i64 }> [#uses=2]
+	%mrv_gr = extractvalue { i64, i64 } %10, 0		; <i64> [#uses=1]
+	store i64 %mrv_gr, i64* %mrv_gep
+	%mrv_gr2 = extractvalue { i64, i64 } %10, 1		; <i64> [#uses=1]
+	store i64 %mrv_gr2, i64* %4
+	%11 = load i8** %5, align 8		; <i8*> [#uses=1]
+	%12 = add i32 %Trial.01, 1		; <i32> [#uses=2]
+	%13 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%14 = icmp ult i32 %12, %13		; <i1> [#uses=1]
+	br i1 %14, label %bb, label %return
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/fold-call.ll b/test/CodeGen/X86/fold-call.ll
new file mode 100644
index 0000000..603e9ad
--- /dev/null
+++ b/test/CodeGen/X86/fold-call.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
+
+declare void @bar()
+
+define void @foo(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()* %arg) nounwind {
+	call void @bar()
+	call void %arg()
+	ret void
+}

diff --git a/test/CodeGen/X86/fold-imm.ll b/test/CodeGen/X86/fold-imm.ll
new file mode 100644
index 0000000..f1fcbcf
--- /dev/null
+++ b/test/CodeGen/X86/fold-imm.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep inc
+; RUN: llc < %s -march=x86 | grep add | grep 4
+
+define i32 @test(i32 %X) nounwind {
+entry:
+	%0 = add i32 %X, 1
+	ret i32 %0
+}
+
+define i32 @test2(i32 %X) nounwind {
+entry:
+	%0 = add i32 %X, 4
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
new file mode 100644
index 0000000..5525af2
--- /dev/null
+++ b/test/CodeGen/X86/fold-load.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
+@stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
+
+; This should just not crash.
+define void @test1() nounwind {
+entry:
+	br i1 true, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%new_size.0.i = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
+	%tmp.i = load i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*)		; <i32> [#uses=1]
+	%tmp.i.upgrd.1 = trunc i32 %tmp.i to i8		; <i8> [#uses=1]
+	%tmp21.i = and i8 %tmp.i.upgrd.1, 1		; <i8> [#uses=1]
+	%tmp22.i = icmp eq i8 %tmp21.i, 0		; <i1> [#uses=1]
+	br i1 %tmp22.i, label %cond_false30.i, label %cond_true23.i
+
+cond_true23.i:		; preds = %cond_true
+	ret void
+
+cond_false30.i:		; preds = %cond_true
+	%tmp35.i = tail call %struct._obstack_chunk* null( i32 %new_size.0.i )		; <%struct._obstack_chunk*> [#uses=0]
+	ret void
+
+cond_next:		; preds = %entry
+	ret void
+}
+
+
+
+define i32 @test2(i16* %P, i16* %Q) nounwind {
+  %A = load i16* %P, align 4                      ; <i16> [#uses=11]
+  %C = zext i16 %A to i32                         ; <i32> [#uses=1]
+  %D = and i32 %C, 255                            ; <i32> [#uses=1]
+  br label %L
+L:
+
+  store i16 %A, i16* %Q
+  ret i32 %D
+  
+; CHECK: test2:
+; CHECK: 	movl	4(%esp), %eax
+; CHECK-NEXT:	movzwl	(%eax), %ecx
+
+}
+

diff --git a/test/CodeGen/X86/fold-mul-lohi.ll b/test/CodeGen/X86/fold-mul-lohi.ll
new file mode 100644
index 0000000..0351eca
--- /dev/null
+++ b/test/CodeGen/X86/fold-mul-lohi.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
+
+@B = external global [1000 x i8], align 32
+@A = external global [1000 x i8], align 32
+@P = external global [1000 x i8], align 32
+
+define void @foo(i32 %m) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load i8* %tmp2, align 4
+	%tmp4 = mul i8 %tmp3, 2
+	%tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+	store i8 %tmp4, i8* %tmp5, align 4
+	%tmp8 = mul i32 %i.019.0, 9
+	%tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp8
+	store i8 17, i8* %tmp10, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
+

diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
new file mode 100644
index 0000000..ef5202f
--- /dev/null
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll

@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI1_2  | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+
+; This testcase shouldn't need to spill the -1 value,
+; so it should just use pcmpeqd to materialize an all-ones vector.
+; For i386, cp load of -1 are folded.
+
+	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
+	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
+	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
+
+define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
+entry:
+	%tmp3.i = load i32* null		; <i32> [#uses=1]
+	%cmp = icmp sgt i32 %tmp3.i, 200		; <i1> [#uses=1]
+	br i1 %cmp, label %forcond, label %ifthen
+
+ifthen:		; preds = %entry
+	ret void
+
+forcond:		; preds = %entry
+	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
+	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
+	br i1 %cmp12, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%bitcast204.i313 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul233 = fmul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul257 = fmul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul275 = fmul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp51 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %mul275, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast198.i182 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
+	%bitcast204.i185 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp69 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> zeroinitializer) nounwind		; <<4 x i32>> [#uses=1]
+	%tmp70 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp69) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i78 = fsub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
+	%mul166.i86 = fmul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add167.i87 = fadd <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i88 = fmul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add172.i89 = fadd <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i90 = bitcast <4 x float> %add172.i89 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i92 = and <4 x i32> %bitcast176.i90, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i93 = bitcast <4 x i32> %andnps178.i92 to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i96 = fmul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast190.i98 = bitcast <4 x float> %mul186.i96 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i100 = and <4 x i32> %bitcast190.i98, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%xorps.i102 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i103 = or <4 x i32> %andnps192.i100, %xorps.i102		; <<4 x i32>> [#uses=1]
+	%bitcast204.i104 = bitcast <4 x i32> %orps203.i103 to <4 x float>		; <<4 x float>> [#uses=1]
+	%cmple.i = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> %tmp51, i8 2) nounwind		; <<4 x float>> [#uses=1]
+	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
+	%bitcast148.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps150.i = and <4 x i32> %bitcast148.i, < i32 -2139095041, i32 -2139095041, i32 -2139095041, i32 -2139095041 >		; <<4 x i32>> [#uses=0]
+	%mul171.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast189.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
+	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast198.i = bitcast <4 x float> %cmple.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%xorps.i = xor <4 x i32> %bitcast198.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul307 = fmul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp82 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul307, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast11.i15 = bitcast <4 x float> %tmp82 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = and <4 x i32> %bitcast11.i15, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%orps.i18 = or <4 x i32> %andnps.i17, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%bitcast6.i4 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i5 = and <4 x i32> %bitcast.i3, %bitcast6.i4		; <<4 x i32>> [#uses=1]
+	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i7 = xor <4 x i32> %bitcast6.i4, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
+	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
+	%bitcast.i = bitcast <4 x float> %mul313 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i = and <4 x i32> %bitcast.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%orps.i = or <4 x i32> zeroinitializer, %andps.i		; <<4 x i32>> [#uses=1]
+	%bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float>		; <<4 x float>> [#uses=1]
+	call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	unreachable
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone

diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
new file mode 100644
index 0000000..cc4198d
--- /dev/null
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep pcmpeqd %t | count 1
+; RUN: grep xor %t | count 1
+; RUN: not grep LCP %t
+
+define <2 x double> @foo() nounwind {
+  ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
+}
+define <2 x double> @bar() nounwind {
+  ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
+}

diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
new file mode 100644
index 0000000..49f8795
--- /dev/null
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll

@@ -0,0 +1,83 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+
+; This testcase should need to spill the -1 value on x86-32,
+; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
+; should use a constant-pool load instead.
+
+	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
+	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
+	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
+
+define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
+entry:
+	%tmp3.i = load i32* null		; <i32> [#uses=1]
+	%cmp = icmp slt i32 0, %tmp3.i		; <i1> [#uses=1]
+	br i1 %cmp, label %forcond, label %ifthen
+
+ifthen:		; preds = %entry
+	ret void
+
+forcond:		; preds = %entry
+	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
+	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
+	br i1 %cmp12, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%bitcast204.i104 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp78 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> < float 1.280000e+02, float 1.280000e+02, float 1.280000e+02, float 1.280000e+02 >, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=2]
+	%tmp79 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp78) nounwind		; <<4 x i32>> [#uses=1]
+	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp79) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> %tmp78, %tmp80		; <<4 x float>> [#uses=2]
+	%mul166.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add167.i = fadd <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
+	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
+	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
+	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
+	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i = and <4 x i32> zeroinitializer, %bitcast6.i		; <<4 x i32>> [#uses=1]
+	%bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i = xor <4 x i32> %bitcast6.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i = and <4 x i32> %bitcast11.i, %not.i		; <<4 x i32>> [#uses=1]
+	%orps.i = or <4 x i32> %andnps.i, %andps.i		; <<4 x i32>> [#uses=1]
+	%bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float>		; <<4 x float>> [#uses=1]
+	call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	unreachable
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone

diff --git a/test/CodeGen/X86/fold-sext-trunc.ll b/test/CodeGen/X86/fold-sext-trunc.ll
new file mode 100644
index 0000000..2605123
--- /dev/null
+++ b/test/CodeGen/X86/fold-sext-trunc.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep movslq | count 1
+; PR4050
+
+	type { i64 }		; type %0
+	%struct.S1 = type { i16, i32 }
+@g_10 = external global %struct.S1		; <%struct.S1*> [#uses=2]
+
+declare void @func_28(i64, i64)
+
+define void @int322(i32 %foo) nounwind {
+entry:
+	%val = load i64* getelementptr (%0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0)		; <i64> [#uses=1]
+	%0 = load i32* getelementptr (%struct.S1* @g_10, i32 0, i32 1), align 4		; <i32> [#uses=1]
+	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%tmp4.i = lshr i64 %val, 32		; <i64> [#uses=1]
+	%tmp5.i = trunc i64 %tmp4.i to i32		; <i32> [#uses=1]
+	%2 = sext i32 %tmp5.i to i64		; <i64> [#uses=1]
+	tail call void @func_28(i64 %2, i64 %1) nounwind
+	ret void
+}

diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
new file mode 100644
index 0000000..cafc61a
--- /dev/null
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll

@@ -0,0 +1,9 @@
+;; Test that this FP immediate is stored in the constant pool as a float.
+
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
+; RUN:   grep {.long.1123418112}
+
+define double @D() {
+        ret double 1.230000e+02
+}
+

diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
new file mode 100644
index 0000000..08ea77d
--- /dev/null
+++ b/test/CodeGen/X86/fp-in-intregs.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not egrep {\(\(xor\|and\)ps\|movd\)}
+
+; These operations should be done in integer registers, eliminating constant
+; pool loads, movd's etc.
+
+define i32 @test1(float %x) nounwind  {
+entry:
+	%tmp2 = fsub float -0.000000e+00, %x		; <float> [#uses=1]
+	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp210
+}
+
+define i32 @test2(float %x) nounwind  {
+entry:
+	%tmp2 = tail call float @copysignf( float 1.000000e+00, float %x ) nounwind readnone 		; <float> [#uses=1]
+	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp210
+}
+
+declare float @copysignf(float, float) nounwind readnone 
+

diff --git a/test/CodeGen/X86/fp-stack-2results.ll b/test/CodeGen/X86/fp-stack-2results.ll
new file mode 100644
index 0000000..321e267
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-2results.ll

@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=x86 | grep fldz
+; RUN: llc < %s -march=x86-64 | grep fld1
+
+; This is basically this code on x86-64:
+; _Complex long double test() { return 1.0; }
+define {x86_fp80, x86_fp80} @test() {
+  %A = fpext double 1.0 to x86_fp80
+  %B = fpext double 0.0 to x86_fp80
+  ret x86_fp80 %A, x86_fp80 %B
+}
+
+
+;_test2:
+;	fld1
+;	fld	%st(0)
+;	ret
+define {x86_fp80, x86_fp80} @test2() {
+  %A = fpext double 1.0 to x86_fp80
+  ret x86_fp80 %A, x86_fp80 %A
+}
+
+; Uses both values.
+define void @call1(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %b, x86_fp80* %P1
+
+  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void 
+}
+
+; Uses both values, requires fxch
+define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %b, x86_fp80* %P1
+
+  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void
+}
+
+; Uses ST(0), ST(1) is dead but must be popped.
+define void @call3(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %b, x86_fp80* %P1
+  ret void 
+}
+
+; Uses ST(1), ST(0) is dead and must be popped.
+define void @call4(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+
+  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void 
+}
+

diff --git a/test/CodeGen/X86/fp-stack-O0-crash.ll b/test/CodeGen/X86/fp-stack-O0-crash.ll
new file mode 100644
index 0000000..4768ea2
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-O0-crash.ll

@@ -0,0 +1,30 @@
+; RUN: llc %s -O0 -fast-isel -regalloc=local -o -
+; PR4767
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10"
+
+define void @fn(x86_fp80 %x) nounwind ssp {
+entry:
+  %x.addr = alloca x86_fp80                       ; <x86_fp80*> [#uses=5]
+  store x86_fp80 %x, x86_fp80* %x.addr
+  br i1 false, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp = load x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
+  %tmp1 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp = fcmp oeq x86_fp80 %tmp, %tmp1            ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+cond.false:                                       ; preds = %entry
+  %tmp2 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp3 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp4 = fcmp une x86_fp80 %tmp2, %tmp3          ; <i1> [#uses=1]
+  br i1 %cmp4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.false, %cond.true
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.false, %cond.true
+  ret void
+}

diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
new file mode 100644
index 0000000..4bdf459
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-compare.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
+; RUN:   grep {fucomi.*st.\[12\]}
+; PR1012
+
+define float @foo(float* %col.2.0) {
+        %tmp = load float* %col.2.0             ; <float> [#uses=3]
+        %tmp16 = fcmp olt float %tmp, 0.000000e+00              ; <i1> [#uses=1]
+        %tmp20 = fsub float -0.000000e+00, %tmp          ; <float> [#uses=1]
+        %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp         ; <float> [#uses=1]
+        ret float %iftmp.2.0
+}
+

diff --git a/test/CodeGen/X86/fp-stack-direct-ret.ll b/test/CodeGen/X86/fp-stack-direct-ret.ll
new file mode 100644
index 0000000..5a28bb5
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-direct-ret.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep fstp
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movsd
+
+declare double @foo()
+
+define double @bar() {
+entry:
+	%tmp5 = tail call double @foo()
+	ret double %tmp5
+}
+

diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
new file mode 100644
index 0000000..f220b24
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mcpu=yonah | grep cvtss2sd
+; RUN: llc < %s -mcpu=yonah | grep fstps
+; RUN: llc < %s -mcpu=yonah | not grep cvtsd2ss
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(double *%b) {
+entry:
+	%tmp13 = tail call double @foo()
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+	volatile store double %tmp3940, double* %b
+	ret void
+}
+
+declare double @foo()

diff --git a/test/CodeGen/X86/fp-stack-ret-store.ll b/test/CodeGen/X86/fp-stack-ret-store.ll
new file mode 100644
index 0000000..05dfc54
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-ret-store.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=yonah | not grep movss
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; This should store directly into P from the FP stack.  It should not
+; go through a stack slot to get there.
+
+define void @bar(double* %P) {
+entry:
+	%tmp = tail call double (...)* @foo( )		; <double> [#uses=1]
+	store double %tmp, double* %P, align 8
+	ret void
+}
+
+declare double @foo(...)
+
+define void @bar2(float* %P) {
+entry:
+	%tmp = tail call double (...)* @foo2( )		; <double> [#uses=1]
+	%tmp1 = fptrunc double %tmp to float		; <float> [#uses=1]
+	store float %tmp1, float* %P, align 4
+	ret void
+}
+
+declare double @foo2(...)
+

diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
new file mode 100644
index 0000000..c83a0cb
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-ret.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
+; RUN: grep fldl %t | count 1
+; RUN: not grep xmm %t
+; RUN: grep {sub.*esp} %t | count 1
+
+; These testcases shouldn't require loading into an XMM register then storing 
+; to memory, then reloading into an FPStack reg.
+
+define double @test1(double *%P) {
+        %A = load double* %P
+        ret double %A
+}
+
+; fastcc should return a value 
+define fastcc double @test2(<2 x double> %A) {
+	%B = extractelement <2 x double> %A, i32 0
+	ret double %B
+}
+
+define fastcc double @test3(<4 x float> %A) {
+	%B = bitcast <4 x float> %A to <2 x double>
+	%C = call fastcc double @test2(<2 x double> %B)
+	ret double %C
+}
+	

diff --git a/test/CodeGen/X86/fp-stack-retcopy.ll b/test/CodeGen/X86/fp-stack-retcopy.ll
new file mode 100644
index 0000000..67dcb18
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-retcopy.ll

@@ -0,0 +1,12 @@
+; This should not copy the result of foo into an xmm register.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
+; rdar://5689903
+
+declare double @foo()
+
+define double @carg({ double, double }* byval  %z) nounwind  {
+entry:
+	%tmp5 = tail call double @foo() nounwind 		; <double> [#uses=1]
+	ret double %tmp5
+}
+

diff --git a/test/CodeGen/X86/fp-stack-set-st1.ll b/test/CodeGen/X86/fp-stack-set-st1.ll
new file mode 100644
index 0000000..894897a
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-set-st1.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | grep fxch | count 2
+
+define i32 @main() nounwind {
+entry:
+	%asmtmp = tail call { double, double } asm sideeffect "fmul\09%st(1),%st\0A\09fst\09%st(1)\0A\09frndint\0A\09fxch  %st(1)\0A\09fsub\09%st(1),%st\0A\09f2xm1\0A\09", "={st},={st(1)},0,1,~{dirflag},~{fpsr},~{flags}"(double 0x4030FEFBD582097D, double 4.620000e+01) nounwind		; <{ double, double }> [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/fp2sint.ll b/test/CodeGen/X86/fp2sint.ll
new file mode 100644
index 0000000..1675444
--- /dev/null
+++ b/test/CodeGen/X86/fp2sint.ll

@@ -0,0 +1,18 @@
+;; LowerFP_TO_SINT should not create a stack object if it's not needed.
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep add
+
+define i32 @main(i32 %argc, i8** %argv) {
+cond_false.i.i.i:               ; preds = %bb.i5
+       %tmp35.i = load double* null, align 8           ; <double> [#uses=1]
+       %tmp3536.i = fptosi double %tmp35.i to i32              ; <i32> [#uses=1]
+       %tmp3536140.i = zext i32 %tmp3536.i to i64              ; <i64> [#uses=1]
+       %tmp39.i = load double* null, align 4           ; <double> [#uses=1]
+       %tmp3940.i = fptosi double %tmp39.i to i32              ; <i32> [#uses=1]
+       %tmp3940137.i = zext i32 %tmp3940.i to i64              ; <i64> [#uses=1]
+       %tmp3940137138.i = shl i64 %tmp3940137.i, 32            ; <i64> [#uses=1]
+       %tmp3940137138.ins.i = or i64 %tmp3940137138.i, %tmp3536140.i           ; <i64> [#uses=1]
+       %tmp95.i.i = trunc i64 %tmp3940137138.ins.i to i32              ; <i32> [#uses=1]
+       store i32 %tmp95.i.i, i32* null, align 4
+       ret i32 0
+}

diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll
new file mode 100644
index 0000000..b3ec538
--- /dev/null
+++ b/test/CodeGen/X86/fp_constant_op.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | FileCheck %s
+; Test that the load of the constant is folded into the operation.
+
+
+define double @foo_add(double %P) {
+	%tmp.1 = fadd double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_add:
+; CHECK: fadd DWORD PTR
+
+define double @foo_mul(double %P) {
+	%tmp.1 = fmul double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_mul:
+; CHECK: fmul DWORD PTR
+
+define double @foo_sub(double %P) {
+	%tmp.1 = fsub double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_sub:
+; CHECK: fadd DWORD PTR
+
+define double @foo_subr(double %P) {
+	%tmp.1 = fsub double 1.230000e+02, %P		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_subr:
+; CHECK: fsub QWORD PTR
+
+define double @foo_div(double %P) {
+	%tmp.1 = fdiv double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_div:
+; CHECK: fdiv DWORD PTR
+
+define double @foo_divr(double %P) {
+	%tmp.1 = fdiv double 1.230000e+02, %P		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_divr:
+; CHECK: fdiv QWORD PTR
+

diff --git a/test/CodeGen/X86/fp_load_cast_fold.ll b/test/CodeGen/X86/fp_load_cast_fold.ll
new file mode 100644
index 0000000..a160ac6
--- /dev/null
+++ b/test/CodeGen/X86/fp_load_cast_fold.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep fild | not grep ESP
+
+define double @short(i16* %P) {
+        %V = load i16* %P               ; <i16> [#uses=1]
+        %V2 = sitofp i16 %V to double           ; <double> [#uses=1]
+        ret double %V2
+}
+
+define double @int(i32* %P) {
+        %V = load i32* %P               ; <i32> [#uses=1]
+        %V2 = sitofp i32 %V to double           ; <double> [#uses=1]
+        ret double %V2
+}
+
+define double @long(i64* %P) {
+        %V = load i64* %P               ; <i64> [#uses=1]
+        %V2 = sitofp i64 %V to double           ; <double> [#uses=1]
+        ret double %V2
+}
+

diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
new file mode 100644
index 0000000..0145069
--- /dev/null
+++ b/test/CodeGen/X86/fp_load_fold.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}
+
+; Test that the load of the memory location is folded into the operation.
+
+define double @test_add(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fadd double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_mul(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fmul double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_sub(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fsub double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_subr(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fsub double %Y, %X		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_div(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fdiv double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_divr(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fdiv double %Y, %X		; <double> [#uses=1]
+	ret double %R
+}

diff --git a/test/CodeGen/X86/fsxor-alignment.ll b/test/CodeGen/X86/fsxor-alignment.ll
new file mode 100644
index 0000000..6a8dbcf
--- /dev/null
+++ b/test/CodeGen/X86/fsxor-alignment.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
+; RUN:  grep -v sp | grep xorps | count 2
+
+; Don't fold the incoming stack arguments into the xorps instructions used
+; to do floating-point negations, because the arguments aren't vectors
+; and aren't vector-aligned.
+
+define void @foo(float* %p, float* %q, float %s, float %y) {
+  %ss = fsub float -0.0, %s
+  %yy = fsub float -0.0, %y
+  store float %ss, float* %p
+  store float %yy, float* %q
+  ret void
+}

diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
new file mode 100644
index 0000000..3bd58b6
--- /dev/null
+++ b/test/CodeGen/X86/full-lsr.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86 >%t
+
+; TODO: Enhance full lsr mode to get this:
+; RUNX: grep {addl	\\\$4,} %t | count 3
+; RUNX: not grep {,%} %t
+
+; For now, it should find this, which is still pretty good:
+; RUN: not grep {addl	\\\$4,} %t
+; RUN: grep {,%} %t | count 6
+
+define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=5]
+	%1 = getelementptr float* %A, i32 %i.03		; <float*> [#uses=1]
+	%2 = load float* %1, align 4		; <float> [#uses=1]
+	%3 = getelementptr float* %B, i32 %i.03		; <float*> [#uses=1]
+	%4 = load float* %3, align 4		; <float> [#uses=1]
+	%5 = fadd float %2, %4		; <float> [#uses=1]
+	%6 = getelementptr float* %C, i32 %i.03		; <float*> [#uses=1]
+	store float %5, float* %6, align 4
+	%7 = add i32 %i.03, 10		; <i32> [#uses=3]
+	%8 = getelementptr float* %A, i32 %7		; <float*> [#uses=1]
+	%9 = load float* %8, align 4		; <float> [#uses=1]
+	%10 = getelementptr float* %B, i32 %7		; <float*> [#uses=1]
+	%11 = load float* %10, align 4		; <float> [#uses=1]
+	%12 = fadd float %9, %11		; <float> [#uses=1]
+	%13 = getelementptr float* %C, i32 %7		; <float*> [#uses=1]
+	store float %12, float* %13, align 4
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/ga-offset.ll b/test/CodeGen/X86/ga-offset.ll
new file mode 100644
index 0000000..9f6d3f7
--- /dev/null
+++ b/test/CodeGen/X86/ga-offset.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: not grep lea %t
+; RUN: not grep add %t
+; RUN: grep mov %t | count 1
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static > %t
+; RUN: not grep lea %t
+; RUN: not grep add %t
+; RUN: grep mov %t | count 1
+
+; This store should fold to a single mov instruction.
+
+@ptr = global i32* null
+@dst = global [131072 x i32] zeroinitializer
+
+define void @foo() nounwind {
+  store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 16), i32** @ptr
+  ret void
+}

diff --git a/test/CodeGen/X86/global-sections-tls.ll b/test/CodeGen/X86/global-sections-tls.ll
new file mode 100644
index 0000000..2c23030
--- /dev/null
+++ b/test/CodeGen/X86/global-sections-tls.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+; PR4639
+@G1 = internal thread_local global i32 0		; <i32*> [#uses=1]
+; LINUX: .section		.tbss,"awT",@nobits
+; LINUX: G1:
+
+
+define i32* @foo() nounwind readnone {
+entry:
+	ret i32* @G1
+}
+
+

diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
new file mode 100644
index 0000000..1a7b577
--- /dev/null
+++ b/test/CodeGen/X86/global-sections.ll

@@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+
+
+; int G1;
+@G1 = common global i32 0
+
+; LINUX: .type   G1,@object
+; LINUX: .comm  G1,4,4
+
+; DARWIN: .comm	_G1,4,2
+
+
+
+
+; const int G2 __attribute__((weak)) = 42;
+@G2 = weak_odr constant i32 42	
+
+
+; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
+
+; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: _G2:
+; DARWIN:    .long 42
+
+
+; int * const G3 = &G1;
+@G3 = constant i32* @G1
+
+; DARWIN: .section        __DATA,__const
+; DARWIN: .globl _G3
+; DARWIN: _G3:
+; DARWIN:     .long _G1
+
+
+; _Complex long long const G4 = 34;
+@G4 = constant {i64,i64} { i64 34, i64 0 }
+
+; DARWIN: .section        __TEXT,__const
+; DARWIN: _G4:
+; DARWIN:     .long 34
+
+
+; int G5 = 47;
+@G5 = global i32 47
+
+; LINUX: .data
+; LINUX: .globl G5
+; LINUX: G5:
+; LINUX:    .long 47
+
+; DARWIN: .section        __DATA,__data
+; DARWIN: .globl _G5
+; DARWIN: _G5:
+; DARWIN:    .long 47
+
+
+; PR4584
+@"foo bar" = linkonce global i32 42
+
+; LINUX: .type	foo_20_bar,@object
+; LINUX:.section	.gnu.linkonce.d.foo_20_bar,"aw",@progbits
+; LINUX: .weak	foo_20_bar
+; LINUX: foo_20_bar:
+
+; DARWIN: .section		__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl	"_foo bar"
+; DARWIN:	.weak_definition "_foo bar"
+; DARWIN: "_foo bar":
+
+; PR4650
+@G6 = weak_odr constant [1 x i8] c"\01"
+
+; LINUX:   .type	G6,@object
+; LINUX:   .section	.gnu.linkonce.r.G6,"a",@progbits
+; LINUX:   .weak	G6
+; LINUX: G6:
+; LINUX:   .byte	1
+; LINUX:   .size	G6, 1
+
+; DARWIN:  .section __TEXT,__const_coal,coalesced
+; DARWIN:  .globl _G6
+; DARWIN:  .weak_definition _G6
+; DARWIN:_G6:
+; DARWIN:  .byte 1
+
+
+@G7 = constant [10 x i8] c"abcdefghi\00"
+
+; DARWIN:	__TEXT,__cstring,cstring_literals
+; DARWIN:	.globl _G7
+; DARWIN: _G7:
+; DARWIN:	.asciz	"abcdefghi"
+
+; LINUX:	.section		.rodata.str1.1,"aMS",@progbits,1
+; LINUX:	.globl G7
+; LINUX: G7:
+; LINUX:	.asciz	"abcdefghi"
+
+
+@G8 = constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+
+; DARWIN:	.section	__TEXT,__ustring
+; DARWIN:	.globl _G8
+; DARWIN: _G8:
+
+; LINUX:	.section		.rodata.str2.2,"aMS",@progbits,2
+; LINUX:	.globl G8
+; LINUX:G8:
+
+@G9 = constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+
+; DARWIN:	.section        __TEXT,__const
+; DARWIN:	.globl _G9
+; DARWIN: _G9:
+
+; LINUX:	.section		.rodata.str4.4,"aMS",@progbits,4
+; LINUX:	.globl G9
+; LINUX:G9
+
+
+@G10 = weak global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=0]
+
+
+; DARWIN: 	.section	__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl _G10
+; DARWIN:	.weak_definition _G10
+; DARWIN:	.align	5
+; DARWIN: _G10:
+; DARWIN:	.space	400
+
+; LINUX:	.bss
+; LINUX:	.weak	G10
+; LINUX:	.align	32
+; LINUX: G10:
+; LINUX:	.zero	400
+

diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
new file mode 100644
index 0000000..76ffd66
--- /dev/null
+++ b/test/CodeGen/X86/h-register-addressing-32.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr double* %p, i32 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr float* %p, i32 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i16* %p, i32 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 5
+  %t1 = and i32 %t0, 2040
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 6
+  %t1 = and i32 %t0, 1020
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 7
+  %t1 = and i32 %t0, 510
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}

diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
new file mode 100644
index 0000000..98817f3
--- /dev/null
+++ b/test/CodeGen/X86/h-register-addressing-64.ll

@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr double* %p, i64 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr float* %p, i64 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i16* %p, i64 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 5
+  %t1 = and i64 %t0, 2040
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 6
+  %t1 = and i64 %t0, 1020
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 7
+  %t1 = and i64 %t0, 510
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}

diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
new file mode 100644
index 0000000..d30e6b3
--- /dev/null
+++ b/test/CodeGen/X86/h-register-store.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep mov %t | count 6
+; RUN: grep {movb	%ah, (%rsi)} %t | count 3
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep mov %t | count 3
+; RUN: grep {movb	%ah, (%e} %t | count 3
+
+; Use h-register extract and store.
+
+define void @foo16(i16 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i16 %p, 8
+  %t = trunc i16 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo32(i32 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i32 %p, 8
+  %t = trunc i32 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo64(i64 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i64 %p, 8
+  %t = trunc i64 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}

diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
new file mode 100644
index 0000000..878fd93
--- /dev/null
+++ b/test/CodeGen/X86/h-registers-0.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 4
+; RUN: llc < %s -march=x86    > %t
+; RUN: grep {incb	%ah} %t | count 3
+; RUN: grep {movzbl	%ah,} %t | count 3
+
+; Use h registers. On x86-64, codegen doesn't support general allocation
+; of h registers yet, due to x86 encoding complications.
+
+define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
+  %t0 = lshr i64 %x, 8
+  %t1 = trunc i64 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
+  %t0 = lshr i32 %x, 8
+  %t1 = trunc i32 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
+  %t0 = lshr i16 %x, 8
+  %t1 = trunc i16 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define i64 @qux64(i64 inreg %x) nounwind {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  ret i64 %t1
+}
+
+define i32 @qux32(i32 inreg %x) nounwind {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  ret i32 %t1
+}
+
+define i16 @qux16(i16 inreg %x) nounwind {
+  %t0 = lshr i16 %x, 8
+  ret i16 %t0
+}

diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
new file mode 100644
index 0000000..e97ebab
--- /dev/null
+++ b/test/CodeGen/X86/h-registers-1.ll

@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
+; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
+
+; LLVM creates virtual registers for values live across blocks
+; based on the type of the value. Make sure that the extracts
+; here use the GR64_NOREX register class for their result,
+; instead of plain GR64.
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d,
+                i64 %e, i64 %f, i64 %g, i64 %h) {
+  %sa = lshr i64 %a, 8
+  %A = and i64 %sa, 255
+  %sb = lshr i64 %b, 8
+  %B = and i64 %sb, 255
+  %sc = lshr i64 %c, 8
+  %C = and i64 %sc, 255
+  %sd = lshr i64 %d, 8
+  %D = and i64 %sd, 255
+  %se = lshr i64 %e, 8
+  %E = and i64 %se, 255
+  %sf = lshr i64 %f, 8
+  %F = and i64 %sf, 255
+  %sg = lshr i64 %g, 8
+  %G = and i64 %sg, 255
+  %sh = lshr i64 %h, 8
+  %H = and i64 %sh, 255
+  br label %next
+
+next:
+  %u = add i64 %A, %B
+  %v = add i64 %C, %D
+  %w = add i64 %E, %F
+  %x = add i64 %G, %H
+  %y = add i64 %u, %v
+  %z = add i64 %w, %x
+  %t = add i64 %y, %z
+  ret i64 %t
+}

diff --git a/test/CodeGen/X86/h-registers-2.ll b/test/CodeGen/X86/h-registers-2.ll
new file mode 100644
index 0000000..16e13f8
--- /dev/null
+++ b/test/CodeGen/X86/h-registers-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {movzbl	%\[abcd\]h,} %t | count 1
+; RUN: grep {shll	\$3,} %t | count 1
+
+; Use an h register, but don't omit the explicit shift for
+; non-address use(s).
+
+define i32 @foo(i8* %x, i32 %y) nounwind {
+	%t0 = lshr i32 %y, 8		; <i32> [#uses=1]
+	%t1 = and i32 %t0, 255		; <i32> [#uses=2]
+        %t2 = shl i32 %t1, 3
+	%t3 = getelementptr i8* %x, i32 %t2		; <i8*> [#uses=1]
+	store i8 77, i8* %t3, align 4
+	ret i32 %t2
+}

diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
new file mode 100644
index 0000000..8a0b07b
--- /dev/null
+++ b/test/CodeGen/X86/h-registers-3.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86    | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+define zeroext i8 @foo() nounwind ssp {
+entry:
+  %0 = tail call zeroext i16 (...)* @bar() nounwind
+  %1 = lshr i16 %0, 8
+  %2 = trunc i16 %1 to i8
+  ret i8 %2
+}
+
+declare zeroext i16 @bar(...)

diff --git a/test/CodeGen/X86/hidden-vis-2.ll b/test/CodeGen/X86/hidden-vis-2.ll
new file mode 100644
index 0000000..74554d1
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis-2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | not grep GOT
+
+@x = weak hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/hidden-vis-3.ll b/test/CodeGen/X86/hidden-vis-3.ll
new file mode 100644
index 0000000..4be881e
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis-3.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+
+@x = external hidden global i32		; <i32*> [#uses=1]
+@y = extern_weak hidden global i32	; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; X32: _t:
+; X32: movl _y, %eax
+
+; X64: _t:
+; X64: movl _y(%rip), %eax
+
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%2 = add i32 %1, %0		; <i32> [#uses=1]
+	ret i32 %2
+}

diff --git a/test/CodeGen/X86/hidden-vis-4.ll b/test/CodeGen/X86/hidden-vis-4.ll
new file mode 100644
index 0000000..a8aede5
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis-4.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
+
+@x = common hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: t:
+; CHECK: movl _x, %eax
+; CHECK: .comm _x,4
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/hidden-vis-5.ll b/test/CodeGen/X86/hidden-vis-5.ll
new file mode 100644
index 0000000..88fae37
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis-5.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+; <rdar://problem/7383328>
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define hidden void @func() nounwind ssp {
+entry:
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define hidden i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @func() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+; CHECK: .private_extern _func.eh
+; CHECK: .private_extern _main.eh

diff --git a/test/CodeGen/X86/hidden-vis.ll b/test/CodeGen/X86/hidden-vis.ll
new file mode 100644
index 0000000..a948bdf
--- /dev/null
+++ b/test/CodeGen/X86/hidden-vis.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN
+
+@a = hidden global i32 0
+@b = external global i32
+
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
+
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
+  ret void
+}
+
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
+
+; DARWIN: t2:
+; DARWIN: .private_extern _a
+  ret void
+}
+

diff --git a/test/CodeGen/X86/i128-and-beyond.ll b/test/CodeGen/X86/i128-and-beyond.ll
new file mode 100644
index 0000000..b741681
--- /dev/null
+++ b/test/CodeGen/X86/i128-and-beyond.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep -- -1 | count 14
+
+; These static initializers are too big to hand off to assemblers
+; as monolithic blobs.
+
+@x = global i128 -1
+@y = global i256 -1
+@z = global i512 -1

diff --git a/test/CodeGen/X86/i128-immediate.ll b/test/CodeGen/X86/i128-immediate.ll
new file mode 100644
index 0000000..c47569e
--- /dev/null
+++ b/test/CodeGen/X86/i128-immediate.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 | grep movq | count 2
+
+define i128 @__addvti3() {
+          ret i128 -1
+}

diff --git a/test/CodeGen/X86/i128-mul.ll b/test/CodeGen/X86/i128-mul.ll
new file mode 100644
index 0000000..e9d30d6
--- /dev/null
+++ b/test/CodeGen/X86/i128-mul.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64
+; PR1198
+
+define i64 @foo(i64 %x, i64 %y) {
+        %tmp0 = zext i64 %x to i128
+        %tmp1 = zext i64 %y to i128
+        %tmp2 = mul i128 %tmp0, %tmp1
+        %tmp7 = zext i32 64 to i128
+        %tmp3 = lshr i128 %tmp2, %tmp7
+        %tmp4 = trunc i128 %tmp3 to i64
+        ret i64 %tmp4
+}

diff --git a/test/CodeGen/X86/i128-ret.ll b/test/CodeGen/X86/i128-ret.ll
new file mode 100644
index 0000000..277f428
--- /dev/null
+++ b/test/CodeGen/X86/i128-ret.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep {movq	8(%rdi), %rdx}
+; RUN: llc < %s -march=x86-64 | grep {movq	(%rdi), %rax}
+
+define i128 @test(i128 *%P) {
+        %A = load i128* %P
+        ret i128 %A
+}
+

diff --git a/test/CodeGen/X86/i256-add.ll b/test/CodeGen/X86/i256-add.ll
new file mode 100644
index 0000000..5a7a7a7
--- /dev/null
+++ b/test/CodeGen/X86/i256-add.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep adcl %t | count 7
+; RUN: grep sbbl %t | count 7
+
+define void @add(i256* %p, i256* %q) nounwind {
+  %a = load i256* %p
+  %b = load i256* %q
+  %c = add i256 %a, %b
+  store i256 %c, i256* %p
+  ret void
+}
+define void @sub(i256* %p, i256* %q) nounwind {
+  %a = load i256* %p
+  %b = load i256* %q
+  %c = sub i256 %a, %b
+  store i256 %c, i256* %p
+  ret void
+}

diff --git a/test/CodeGen/X86/i2k.ll b/test/CodeGen/X86/i2k.ll
new file mode 100644
index 0000000..6116c2e
--- /dev/null
+++ b/test/CodeGen/X86/i2k.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+
+define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
+  %a = load i2011* %x
+  %b = load i2011* %y
+  %c = add i2011 %a, %b
+  store i2011 %c, i2011* %p
+  ret void
+}

diff --git a/test/CodeGen/X86/i64-mem-copy.ll b/test/CodeGen/X86/i64-mem-copy.ll
new file mode 100644
index 0000000..847e209
--- /dev/null
+++ b/test/CodeGen/X86/i64-mem-copy.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
+
+; Uses movsd to load / store i64 values if sse2 is available.
+
+; rdar://6659858
+
+define void @foo(i64* %x, i64* %y) nounwind  {
+entry:
+	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	store i64 %tmp1, i64* %x, align 8
+	ret void
+}

diff --git a/test/CodeGen/X86/iabs.ll b/test/CodeGen/X86/iabs.ll
new file mode 100644
index 0000000..6a79ee8
--- /dev/null
+++ b/test/CodeGen/X86/iabs.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 -stats  |& \
+; RUN:   grep {6 .*Number of machine instrs printed}
+
+;; Integer absolute value, should produce something at least as good as:
+;;       movl %edi, %eax
+;;       sarl $31, %eax
+;;       addl %eax, %edi
+;;       xorl %eax, %edi
+;;       movl %edi, %eax
+;;       ret
+define i32 @test(i32 %a) nounwind {
+        %tmp1neg = sub i32 0, %a
+        %b = icmp sgt i32 %a, -1
+        %abs = select i1 %b, i32 %a, i32 %tmp1neg
+        ret i32 %abs
+}
+

diff --git a/test/CodeGen/X86/illegal-insert.ll b/test/CodeGen/X86/illegal-insert.ll
new file mode 100644
index 0000000..dbf1b14
--- /dev/null
+++ b/test/CodeGen/X86/illegal-insert.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64
+
+define <4 x double> @foo0(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 0
+  ret <4 x double> %r
+}
+define <4 x double> @foo1(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 1
+  ret <4 x double> %r
+}
+define <4 x double> @foo2(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 2
+  ret <4 x double> %r
+}
+define <4 x double> @foo3(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 3
+  ret <4 x double> %r
+}

diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
new file mode 100644
index 0000000..cecf77a
--- /dev/null
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
+
+define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
+  %y = fmul <4 x double> %x, %z
+  ret <4 x double> %y
+}
+
+define <8 x float> @bar(<8 x float> %x, <8 x float> %z) {
+  %y = fadd <8 x float> %x, %z
+  ret <8 x float> %y
+}

diff --git a/test/CodeGen/X86/imp-def-copies.ll b/test/CodeGen/X86/imp-def-copies.ll
new file mode 100644
index 0000000..9117840
--- /dev/null
+++ b/test/CodeGen/X86/imp-def-copies.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 | not grep mov
+
+	%struct.active_line = type { %struct.gs_fixed_point, %struct.gs_fixed_point, i32, i32, i32, %struct.line_segment*, i32, i16, i16, %struct.active_line*, %struct.active_line* }
+	%struct.gs_fixed_point = type { i32, i32 }
+	%struct.line_list = type { %struct.active_line*, i32, i16, %struct.active_line*, %struct.active_line*, %struct.active_line*, %struct.active_line, i32 }
+	%struct.line_segment = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point }
+	%struct.subpath = type { %struct.line_segment*, %struct.line_segment*, i32, %struct.gs_fixed_point, %struct.line_segment*, i32, i32, i8 }
+
+define fastcc void @add_y_list(%struct.subpath* %ppath.0.4.val, i16 signext  %tag, %struct.line_list* %ll, i32 %pbox.0.0.1.val, i32 %pbox.0.1.0.val, i32 %pbox.0.1.1.val) nounwind  {
+entry:
+	br i1 false, label %return, label %bb
+bb:		; preds = %bb280, %entry
+	%psub.1.reg2mem.0 = phi %struct.subpath* [ %psub.0.reg2mem.0, %bb280 ], [ undef, %entry ]		; <%struct.subpath*> [#uses=1]
+	%plast.1.reg2mem.0 = phi %struct.line_segment* [ %plast.0.reg2mem.0, %bb280 ], [ undef, %entry ]		; <%struct.line_segment*> [#uses=1]
+	%prev_dir.0.reg2mem.0 = phi i32 [ %dir.0.reg2mem.0, %bb280 ], [ undef, %entry ]		; <i32> [#uses=1]
+	br i1 false, label %bb280, label %bb109
+bb109:		; preds = %bb
+	%tmp113 = icmp sgt i32 0, %prev_dir.0.reg2mem.0		; <i1> [#uses=1]
+	br i1 %tmp113, label %bb116, label %bb280
+bb116:		; preds = %bb109
+	ret void
+bb280:		; preds = %bb109, %bb
+	%psub.0.reg2mem.0 = phi %struct.subpath* [ null, %bb ], [ %psub.1.reg2mem.0, %bb109 ]		; <%struct.subpath*> [#uses=1]
+	%plast.0.reg2mem.0 = phi %struct.line_segment* [ null, %bb ], [ %plast.1.reg2mem.0, %bb109 ]		; <%struct.line_segment*> [#uses=1]
+	%dir.0.reg2mem.0 = phi i32 [ 0, %bb ], [ 0, %bb109 ]		; <i32> [#uses=1]
+	br i1 false, label %return, label %bb
+return:		; preds = %bb280, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll
new file mode 100644
index 0000000..1cb54b3
--- /dev/null
+++ b/test/CodeGen/X86/imul-lea-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 3
+; RUN: llc < %s -march=x86-64 | grep shl | count 1
+; RUN: llc < %s -march=x86-64 | not grep imul
+
+define i64 @t1(i64 %a) nounwind readnone {
+entry:
+	%0 = mul i64 %a, 81		; <i64> [#uses=1]
+	ret i64 %0
+}
+
+define i64 @t2(i64 %a) nounwind readnone {
+entry:
+	%0 = mul i64 %a, 40		; <i64> [#uses=1]
+	ret i64 %0
+}

diff --git a/test/CodeGen/X86/imul-lea.ll b/test/CodeGen/X86/imul-lea.ll
new file mode 100644
index 0000000..4e8e2af
--- /dev/null
+++ b/test/CodeGen/X86/imul-lea.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep lea
+
+declare i32 @foo()
+
+define i32 @test() {
+        %tmp.0 = tail call i32 @foo( )          ; <i32> [#uses=1]
+        %tmp.1 = mul i32 %tmp.0, 9              ; <i32> [#uses=1]
+        ret i32 %tmp.1
+}
+

diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll
new file mode 100644
index 0000000..4a2c7fc
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-2addr.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 | not grep movq
+
+define i64 @t(i64 %a, i64 %b) nounwind ssp {
+entry:
+	%asmtmp = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %a) nounwind		; <i64> [#uses=1]
+	%asmtmp1 = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %b) nounwind		; <i64> [#uses=1]
+	%0 = add i64 %asmtmp1, %asmtmp		; <i64> [#uses=1]
+	ret i64 %0
+}

diff --git a/test/CodeGen/X86/inline-asm-R-constraint.ll b/test/CodeGen/X86/inline-asm-R-constraint.ll
new file mode 100644
index 0000000..66c27ac
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-R-constraint.ll

@@ -0,0 +1,18 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; 7282062
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp {
+entry:
+; CHECK: udiv8:
+; CHECK-NOT: movb %ah, (%r8)
+  %a_addr = alloca i16, align 2                   ; <i16*> [#uses=2]
+  %b_addr = alloca i8, align 1                    ; <i8*> [#uses=2]
+  store i16 %a, i16* %a_addr
+  store i8 %b, i8* %b_addr
+  call void asm "\09\09movw\09$2, %ax\09\09\0A\09\09divb\09$3\09\09\09\0A\09\09movb\09%al, $0\09\0A\09\09movb %ah, ($4)", "=*m,=*m,*m,*m,R,~{dirflag},~{fpsr},~{flags},~{ax}"(i8* %quotient, i8* %remainder, i16* %a_addr, i8* %b_addr, i8* %remainder) nounwind
+  ret void
+; CHECK: ret
+}

diff --git a/test/CodeGen/X86/inline-asm-flag-clobber.ll b/test/CodeGen/X86/inline-asm-flag-clobber.ll
new file mode 100644
index 0000000..51ea843
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-flag-clobber.ll

@@ -0,0 +1,19 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; PR3701
+
+define i64 @t(i64* %arg) nounwind {
+	br i1 true, label %1, label %5
+
+; <label>:1		; preds = %0
+	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
+	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; CHECK: test
+; CHECK-NEXT: j
+	br i1 %2, label %4, label %5
+
+; <label>:4		; preds = %1
+	ret i64 1
+
+; <label>:5		; preds = %1
+	ret i64 0
+}

diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
new file mode 100644
index 0000000..09b0929
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86
+
+define x86_fp80 @test1() {
+        %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"()
+        ret x86_fp80 %tmp85
+}
+
+define double @test2() {
+        %tmp85 = call double asm sideeffect "fld0", "={st(0)}"()
+        ret double %tmp85
+}
+
+define void @test3(x86_fp80 %X) {
+        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X)
+        ret void
+}
+
+define void @test4(double %X) {
+        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %X)
+        ret void
+}
+
+define void @test5(double %X) {
+        %Y = fadd double %X, 123.0
+        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %Y)
+        ret void
+}
+
+define void @test6(double %A, double %B, double %C, 
+                   double %D, double %E) nounwind  {
+entry:
+	; Uses the same value twice, should have one fstp after the asm.
+	tail call void asm sideeffect "foo $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %A, double %A ) nounwind 
+	; Uses two different values, should be in st(0)/st(1) and both be popped.
+	tail call void asm sideeffect "bar $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %B, double %C ) nounwind 
+	; Uses two different values, one of which isn't killed in this asm, it
+	; should not be popped after the asm.
+	tail call void asm sideeffect "baz $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %D, double %E ) nounwind 
+	; This is the last use of %D, so it should be popped after.
+	tail call void asm sideeffect "baz $0", "f,~{dirflag},~{fpsr},~{flags}"( double %D ) nounwind 
+	ret void
+}
+

diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll
new file mode 100644
index 0000000..ffa6ee6
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-fpstack2.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {fld	%%st(0)} %t
+; PR4185
+
+define void @test() {
+return:
+	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
+	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
+	ret void
+}

diff --git a/test/CodeGen/X86/inline-asm-fpstack3.ll b/test/CodeGen/X86/inline-asm-fpstack3.ll
new file mode 100644
index 0000000..17945fe
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-fpstack3.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {fld	%%st(0)} %t
+; PR4459
+
+declare x86_fp80 @ceil(x86_fp80)
+
+declare void @test(x86_fp80)
+
+define void @test2(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @ceil(x86_fp80 %a)
+	call void asm sideeffect "fistpl $0", "{st}"( x86_fp80 %0)
+	call void @test(x86_fp80 %0 )
+        ret void
+}

diff --git a/test/CodeGen/X86/inline-asm-fpstack4.ll b/test/CodeGen/X86/inline-asm-fpstack4.ll
new file mode 100644
index 0000000..bae2970
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-fpstack4.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+; PR4484
+
+declare x86_fp80 @ceil()
+
+declare void @test(x86_fp80)
+
+define void @test2(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @ceil()
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a)
+	call void @test(x86_fp80 %0)
+	ret void
+}
+

diff --git a/test/CodeGen/X86/inline-asm-fpstack5.ll b/test/CodeGen/X86/inline-asm-fpstack5.ll
new file mode 100644
index 0000000..8b219cf
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-fpstack5.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+; PR4485
+
+define void @test(x86_fp80* %a) {
+entry:
+	%0 = load x86_fp80* %a, align 16
+	%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
+	%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
+	%3 = load x86_fp80* %a, align 16
+	%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
+	%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
+	ret void
+}

diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll
new file mode 100644
index 0000000..5e76b6c
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-modifier-n.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep { 37}
+; rdar://7008959
+
+define void @bork() nounwind {
+entry:
+	tail call void asm sideeffect "BORK ${0:n}", "i,~{dirflag},~{fpsr},~{flags}"(i32 -37) nounwind
+	ret void
+}

diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
new file mode 100644
index 0000000..78d7e77
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-mrv.ll

@@ -0,0 +1,35 @@
+; PR2094
+; RUN: llc < %s -march=x86-64 | grep movslq
+; RUN: llc < %s -march=x86-64 | grep addps
+; RUN: llc < %s -march=x86-64 | grep paddd
+; RUN: llc < %s -march=x86-64 | not grep movq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind  {
+	%tmp12 = sext i32 %stride to i64		; <i64> [#uses=1]
+	%mrv = call {i32, i8*, i8*} asm sideeffect "$0 $1 $2 $3 $4 $5 $6",
+         "=r,=r,=r,r,r,r,r"( i64 %tmp12, i32 %h, i8* %blk1, i8* %blk2 ) nounwind
+        %tmp6 = getresult {i32, i8*, i8*} %mrv, 0
+	%tmp7 = call i32 asm sideeffect "set $0",
+             "=r,~{dirflag},~{fpsr},~{flags}"( ) nounwind
+	ret i32 %tmp7
+}
+
+define <4 x float> @test2() nounwind {
+	%mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"()
+	%a = getresult {<4 x float>, <4 x float>} %mrv, 0
+	%b = getresult {<4 x float>, <4 x float>} %mrv, 1
+	%c = fadd <4 x float> %a, %b
+	ret <4 x float> %c
+}
+
+define <4 x i32> @test3() nounwind {
+	%mrv = call {<4 x i32>, <4 x i32>} asm "set $0, $1", "=x,=x"()
+	%a = getresult {<4 x i32>, <4 x i32>} %mrv, 0
+	%b = getresult {<4 x i32>, <4 x i32>} %mrv, 1
+	%c = add <4 x i32> %a, %b
+	ret <4 x i32> %c
+}
+

diff --git a/test/CodeGen/X86/inline-asm-out-regs.ll b/test/CodeGen/X86/inline-asm-out-regs.ll
new file mode 100644
index 0000000..46966f5
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-out-regs.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
+; PR3391
+
+@pci_indirect = external global { }             ; <{ }*> [#uses=1]
+@pcibios_last_bus = external global i32         ; <i32*> [#uses=2]
+
+define void @pci_pcbios_init() nounwind section ".init.text" {
+entry:
+        br label %bb1.i
+
+bb1.i:          ; preds = %bb6.i.i, %bb1.i, %entry
+        %0 = load i32* null, align 8            ; <i32> [#uses=1]
+        %1 = icmp ugt i32 %0, 1048575           ; <i1> [#uses=1]
+        br i1 %1, label %bb2.i, label %bb1.i
+
+bb2.i:          ; preds = %bb1.i
+        %asmtmp.i.i = tail call { i32, i32, i32, i32 } asm "lcall *(%edi); cld\0A\09jc 1f\0A\09xor %ah, %ah\0A1:", "={dx},={ax},={bx},={cx},1,{di},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 45313, { }* @pci_indirect) nounwind             ; <{ i32, i32, i32, i32 }> [#uses=2]
+        %asmresult2.i.i = extractvalue { i32, i32, i32, i32 } %asmtmp.i.i, 1   
+        ; <i32> [#uses=1]
+        %2 = lshr i32 %asmresult2.i.i, 8                ; <i32> [#uses=1]
+        %3 = trunc i32 %2 to i8         ; <i8> [#uses=1]
+        %4 = load i32* @pcibios_last_bus, align 4               ; <i32> [#uses=1]
+        %5 = icmp slt i32 %4, 0         ; <i1> [#uses=1]
+        br i1 %5, label %bb5.i.i, label %bb6.i.i
+
+bb5.i.i:                ; preds = %bb2.i
+        %asmresult4.i.i = extractvalue { i32, i32, i32, i32 } %asmtmp.i.i, 3   
+        ; <i32> [#uses=1]
+        %6 = and i32 %asmresult4.i.i, 255               ; <i32> [#uses=1]
+        store i32 %6, i32* @pcibios_last_bus, align 4
+        br label %bb6.i.i
+
+bb6.i.i:                ; preds = %bb5.i.i, %bb2.i
+        %7 = icmp eq i8 %3, 0           ; <i1> [#uses=1]
+        %or.cond.i.i = and i1 %7, false         ; <i1> [#uses=1]
+        br i1 %or.cond.i.i, label %bb1.i, label %bb8.i.i
+
+bb8.i.i:                ; preds = %bb6.i.i
+        unreachable
+}

diff --git a/test/CodeGen/X86/inline-asm-pic.ll b/test/CodeGen/X86/inline-asm-pic.ll
new file mode 100644
index 0000000..0b5ff08
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-pic.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep call
+
+@main_q = internal global i8* null		; <i8**> [#uses=1]
+
+define void @func2() nounwind {
+entry:
+	tail call void asm "mov $1,%gs:$0", "=*m,ri,~{dirflag},~{fpsr},~{flags}"(i8** inttoptr (i32 152 to i8**), i8* bitcast (i8** @main_q to i8*)) nounwind
+	ret void
+}

diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
new file mode 100644
index 0000000..ab44206
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; rdar://7066579
+
+	type { i64, i64, i64, i64, i64 }		; type %0
+
+define void @t() nounwind {
+entry:
+	%asmtmp = call %0 asm sideeffect "mov    %cr0, $0       \0Amov    %cr2, $1       \0Amov    %cr3, $2       \0Amov    %cr4, $3       \0Amov    %cr8, $0       \0A", "=q,=q,=q,=q,=q,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%0> [#uses=0]
+	ret void
+}

diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
new file mode 100644
index 0000000..1f4a13f
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-tied.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 12(%esp)} | count 2
+; rdar://6992609
+
+target triple = "i386-apple-darwin9.0"
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i64 @_OSSwapInt64(i64 %_data) nounwind {
+entry:
+	%retval = alloca i64		; <i64*> [#uses=2]
+	%_data.addr = alloca i64		; <i64*> [#uses=4]
+	store i64 %_data, i64* %_data.addr
+	%tmp = load i64* %_data.addr		; <i64> [#uses=1]
+	%0 = call i64 asm "bswap   %eax\0A\09bswap   %edx\0A\09xchgl   %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind		; <i64> [#uses=1]
+	store i64 %0, i64* %_data.addr
+	%tmp1 = load i64* %_data.addr		; <i64> [#uses=1]
+	store i64 %tmp1, i64* %retval
+	%1 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %1
+}

diff --git a/test/CodeGen/X86/inline-asm-x-scalar.ll b/test/CodeGen/X86/inline-asm-x-scalar.ll
new file mode 100644
index 0000000..5a9628b
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-x-scalar.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+define void @test1() {
+        tail call void asm sideeffect "ucomiss $0", "x"( float 0x41E0000000000000)
+        ret void
+}
+
+define void @test2() {
+        %tmp53 = tail call i32 asm "ucomiss $1, $3\0Acmovae  $2, $0 ", "=r,mx,mr,x,0,~{dirflag},~{fpsr},~{flags},~{cc}"( float 0x41E0000000000000, i32 2147483647, float 0.000000e+00, i32 0 )         ; <i32> [#uses
+        unreachable
+}
+
+define void @test3() {
+        tail call void asm sideeffect "ucomiss $0, $1", "mx,x,~{dirflag},~{fpsr},~{flags},~{cc}"( float 0x41E0000000000000, i32 65536 )
+        ret void
+}
+
+define void @test4() {
+        %tmp1 = tail call float asm "", "=x,0,~{dirflag},~{fpsr},~{flags}"( float 0x47EFFFFFE0000000 ); <float> [#uses=1]
+        %tmp4 = fsub float %tmp1, 0x3810000000000000             ; <float> [#uses=1]
+        tail call void asm sideeffect "", "x,~{dirflag},~{fpsr},~{flags}"( float %tmp4 )
+        ret void
+}
+

diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
new file mode 100644
index 0000000..c66d7a8
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test1() nounwind {
+	; Dest is AX, dest type = i32.
+        %tmp4 = call i32 asm sideeffect "FROB $0", "={ax}"()
+        ret i32 %tmp4
+}
+
+define void @test2(i32 %V) nounwind {
+	; input is AX, in type = i32.
+        call void asm sideeffect "FROB $0", "{ax}"(i32 %V)
+        ret void
+}
+
+define void @test3() nounwind {
+        ; FP constant as a memory operand.
+        tail call void asm sideeffect "frob $0", "m"( float 0x41E0000000000000)
+        ret void
+}
+
+define void @test4() nounwind {
+       ; J means a constant in range 0 to 63.
+       tail call void asm sideeffect "bork $0", "J"(i32 37) nounwind
+       ret void
+}

diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
new file mode 100644
index 0000000..2243f93
--- /dev/null
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep mov | count 3
+
+define fastcc i32 @sqlite3ExprResolveNames() nounwind  {
+entry:
+	br i1 false, label %UnifiedReturnBlock, label %bb4
+bb4:		; preds = %entry
+	br i1 false, label %bb17, label %bb22
+bb17:		; preds = %bb4
+	ret i32 1
+bb22:		; preds = %bb4
+	br i1 true, label %walkExprTree.exit, label %bb4.i
+bb4.i:		; preds = %bb22
+	ret i32 0
+walkExprTree.exit:		; preds = %bb22
+	%tmp83 = load i16* null, align 4		; <i16> [#uses=1]
+	%tmp84 = or i16 %tmp83, 2		; <i16> [#uses=2]
+	store i16 %tmp84, i16* null, align 4
+	%tmp98993 = zext i16 %tmp84 to i32		; <i32> [#uses=1]
+	%tmp1004 = lshr i32 %tmp98993, 3		; <i32> [#uses=1]
+	%tmp100.lobit5 = and i32 %tmp1004, 1		; <i32> [#uses=1]
+	ret i32 %tmp100.lobit5
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/test/CodeGen/X86/ins_subreg_coalesce-2.ll
new file mode 100644
index 0000000..f2c9cc7
--- /dev/null
+++ b/test/CodeGen/X86/ins_subreg_coalesce-2.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 | not grep movw
+
+define i16 @test5(i16 %f12) nounwind {
+	%f11 = shl i16 %f12, 2		; <i16> [#uses=1]
+	%tmp7.25 = ashr i16 %f11, 8		; <i16> [#uses=1]
+	ret i16 %tmp7.25
+}

diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
new file mode 100644
index 0000000..627edc5
--- /dev/null
+++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll

@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 5
+
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %struct.FILE*, [8 x i8] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+
+define void @FontChange(i1 %foo) nounwind {
+entry:
+	br i1 %foo, label %bb298, label %bb49
+bb49:		; preds = %entry
+	ret void
+bb298:		; preds = %entry
+	br i1 %foo, label %bb304, label %bb366
+bb304:		; preds = %bb298
+	br i1 %foo, label %bb330, label %bb428
+bb330:		; preds = %bb366, %bb304
+	br label %bb366
+bb366:		; preds = %bb330, %bb298
+	br i1 %foo, label %bb330, label %bb428
+bb428:		; preds = %bb366, %bb304
+	br i1 %foo, label %bb650, label %bb433
+bb433:		; preds = %bb428
+	ret void
+bb650:		; preds = %bb650, %bb428
+	%tmp658 = load i8* null, align 8		; <i8> [#uses=1]
+	%tmp659 = icmp eq i8 %tmp658, 0		; <i1> [#uses=1]
+	br i1 %tmp659, label %bb650, label %bb662
+bb662:		; preds = %bb650
+	%tmp685 = icmp eq %struct.rec* null, null		; <i1> [#uses=1]
+	br i1 %tmp685, label %bb761, label %bb688
+bb688:		; preds = %bb662
+	ret void
+bb761:		; preds = %bb662
+	%tmp487248736542 = load i32* null, align 4		; <i32> [#uses=2]
+	%tmp487648776541 = and i32 %tmp487248736542, 57344		; <i32> [#uses=1]
+	%tmp4881 = icmp eq i32 %tmp487648776541, 8192		; <i1> [#uses=1]
+	br i1 %tmp4881, label %bb4884, label %bb4897
+bb4884:		; preds = %bb761
+	%tmp488948906540 = and i32 %tmp487248736542, 7168		; <i32> [#uses=1]
+	%tmp4894 = icmp eq i32 %tmp488948906540, 1024		; <i1> [#uses=1]
+	br i1 %tmp4894, label %bb4932, label %bb4897
+bb4897:		; preds = %bb4884, %bb761
+	ret void
+bb4932:		; preds = %bb4884
+	%tmp4933 = load i32* null, align 4		; <i32> [#uses=1]
+	br i1 %foo, label %bb5054, label %bb4940
+bb4940:		; preds = %bb4932
+	%tmp4943 = load i32* null, align 4		; <i32> [#uses=2]
+	switch i32 %tmp4933, label %bb5054 [
+		 i32 159, label %bb4970
+		 i32 160, label %bb5002
+	]
+bb4970:		; preds = %bb4940
+	%tmp49746536 = trunc i32 %tmp4943 to i16		; <i16> [#uses=1]
+	%tmp49764977 = and i16 %tmp49746536, 4095		; <i16> [#uses=1]
+	%mask498049814982 = zext i16 %tmp49764977 to i64		; <i64> [#uses=1]
+	%tmp4984 = getelementptr %struct.FONT_INFO* null, i64 %mask498049814982, i32 5		; <%struct.rec**> [#uses=1]
+	%tmp4985 = load %struct.rec** %tmp4984, align 8		; <%struct.rec*> [#uses=1]
+	%tmp4988 = getelementptr %struct.rec* %tmp4985, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
+	%tmp4991 = bitcast %struct.THIRD_UNION* %tmp4988 to i32*		; <i32*> [#uses=1]
+	%tmp4992 = load i32* %tmp4991, align 8		; <i32> [#uses=1]
+	%tmp49924993 = trunc i32 %tmp4992 to i16		; <i16> [#uses=1]
+	%tmp4996 = add i16 %tmp49924993, 0		; <i16> [#uses=1]
+	br label %bb5054
+bb5002:		; preds = %bb4940
+	%tmp50066537 = trunc i32 %tmp4943 to i16		; <i16> [#uses=1]
+	%tmp50085009 = and i16 %tmp50066537, 4095		; <i16> [#uses=1]
+	%mask501250135014 = zext i16 %tmp50085009 to i64		; <i64> [#uses=1]
+	%tmp5016 = getelementptr %struct.FONT_INFO* null, i64 %mask501250135014, i32 5		; <%struct.rec**> [#uses=1]
+	%tmp5017 = load %struct.rec** %tmp5016, align 8		; <%struct.rec*> [#uses=1]
+	%tmp5020 = getelementptr %struct.rec* %tmp5017, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
+	%tmp5023 = bitcast %struct.THIRD_UNION* %tmp5020 to i32*		; <i32*> [#uses=1]
+	%tmp5024 = load i32* %tmp5023, align 8		; <i32> [#uses=1]
+	%tmp50245025 = trunc i32 %tmp5024 to i16		; <i16> [#uses=1]
+	%tmp5028 = sub i16 %tmp50245025, 0		; <i16> [#uses=1]
+	br label %bb5054
+bb5054:		; preds = %bb5002, %bb4970, %bb4940, %bb4932
+	%flen.0.reg2mem.0 = phi i16 [ %tmp4996, %bb4970 ], [ %tmp5028, %bb5002 ], [ 0, %bb4932 ], [ undef, %bb4940 ]		; <i16> [#uses=0]
+	ret void
+}

diff --git a/test/CodeGen/X86/insertelement-copytoregs.ll b/test/CodeGen/X86/insertelement-copytoregs.ll
new file mode 100644
index 0000000..34a29ca
--- /dev/null
+++ b/test/CodeGen/X86/insertelement-copytoregs.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
+
+define void @foo(<2 x float>* %p) {
+  %t = insertelement <2 x float> undef, float 0.0, i32 0
+  %v = insertelement <2 x float> %t,   float 0.0, i32 1
+  br label %bb8
+
+bb8:
+  store <2 x float> %v, <2 x float>* %p
+  ret void
+}

diff --git a/test/CodeGen/X86/insertelement-legalize.ll b/test/CodeGen/X86/insertelement-legalize.ll
new file mode 100644
index 0000000..18aade2
--- /dev/null
+++ b/test/CodeGen/X86/insertelement-legalize.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -disable-mmx
+
+; Test to check that we properly legalize an insert vector element
+define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
+entry:
+	%tmp4 = insertelement <2 x i64> %val, i64 %x, i32 0		; <<2 x i64>> [#uses=1]
+	%add = add <2 x i64> %tmp4, %val		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %add, <2 x i64>* %dst
+	ret void
+}

diff --git a/test/CodeGen/X86/invalid-shift-immediate.ll b/test/CodeGen/X86/invalid-shift-immediate.ll
new file mode 100644
index 0000000..77a9f7e
--- /dev/null
+++ b/test/CodeGen/X86/invalid-shift-immediate.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR2098
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @foo(i32 %x) {
+entry:
+	%x_addr = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %x, i32* %x_addr
+	%tmp = load i32* %x_addr, align 4		; <i32> [#uses=1]
+	%tmp1 = ashr i32 %tmp, -2		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 1		; <i32> [#uses=1]
+	%tmp23 = trunc i32 %tmp2 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb5
+
+bb:		; preds = %entry
+	%tmp4 = call i32 (...)* @bar( ) nounwind 		; <i32> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb, %entry
+	br label %return
+
+return:		; preds = %bb5
+	ret void
+}
+
+declare i32 @bar(...)

diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
new file mode 100644
index 0000000..0f94b23
--- /dev/null
+++ b/test/CodeGen/X86/isel-sink.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 | \
+; RUN:   grep {movl	\$4, (.*,.*,4)}
+
+define i32 @test(i32* %X, i32 %B) {
+	; This gep should be sunk out of this block into the load/store users.
+	%P = getelementptr i32* %X, i32 %B
+	%G = icmp ult i32 %B, 1234
+	br i1 %G, label %T, label %F
+T:
+	store i32 4, i32* %P
+	ret i32 141
+F:
+	%V = load i32* %P
+	ret i32 %V
+}
+	
+	

diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll
new file mode 100644
index 0000000..5ed0e00
--- /dev/null
+++ b/test/CodeGen/X86/isel-sink2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {movb.7(%...)} %t
+; RUN: not grep leal %t
+
+define i8 @test(i32 *%P) nounwind {
+  %Q = getelementptr i32* %P, i32 1
+  %R = bitcast i32* %Q to i8*
+  %S = load i8* %R
+  %T = icmp eq i8 %S, 0
+  br i1 %T, label %TB, label %F
+TB:
+  ret i8 4
+F:
+  %U = getelementptr i8* %R, i32 3
+  %V = load i8* %U
+  ret i8 %V
+}

diff --git a/test/CodeGen/X86/isel-sink3.ll b/test/CodeGen/X86/isel-sink3.ll
new file mode 100644
index 0000000..8d3d97a
--- /dev/null
+++ b/test/CodeGen/X86/isel-sink3.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s | grep {addl.\$4, %ecx}
+; RUN: llc < %s | not grep leal
+; this should not sink %1 into bb1, that would increase reg pressure.
+
+; rdar://6399178
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @bar(i32** %P) nounwind {
+entry:
+	%0 = load i32** %P, align 4		; <i32*> [#uses=2]
+	%1 = getelementptr i32* %0, i32 1		; <i32*> [#uses=1]
+	%2 = icmp ugt i32* %1, inttoptr (i64 1233 to i32*)		; <i1> [#uses=1]
+	br i1 %2, label %bb1, label %bb
+
+bb:		; preds = %entry
+	store i32* inttoptr (i64 123 to i32*), i32** %P, align 4
+	br label %bb1
+
+bb1:		; preds = %entry, %bb
+	%3 = getelementptr i32* %1, i32 1		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=1]
+	ret i32 %4
+}

diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
new file mode 100644
index 0000000..507a328
--- /dev/null
+++ b/test/CodeGen/X86/isint.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: not grep cmp %t
+; RUN: not grep xor %t
+; RUN: grep jne %t | count 1
+; RUN: grep jp %t | count 1
+; RUN: grep setnp %t | count 1
+; RUN: grep sete %t | count 1
+; RUN: grep and %t | count 1
+; RUN: grep cvt %t | count 4
+
+define i32 @isint_return(double %d) nounwind {
+  %i = fptosi double %d to i32
+  %e = sitofp i32 %i to double
+  %c = fcmp oeq double %d, %e
+  %z = zext i1 %c to i32
+  ret i32 %z
+}
+
+declare void @foo()
+
+define void @isint_branch(double %d) nounwind {
+  %i = fptosi double %d to i32
+  %e = sitofp i32 %i to double
+  %c = fcmp oeq double %d, %e
+  br i1 %c, label %true, label %false
+true:
+  call void @foo()
+  ret void
+false:
+  ret void
+}

diff --git a/test/CodeGen/X86/isnan.ll b/test/CodeGen/X86/isnan.ll
new file mode 100644
index 0000000..4d465c0
--- /dev/null
+++ b/test/CodeGen/X86/isnan.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | not grep call
+
+declare i1 @llvm.isunordered.f64(double)
+
+define i1 @test_isnan(double %X) {
+        %R = fcmp uno double %X, %X             ; <i1> [#uses=1]
+        ret i1 %R
+}
+

diff --git a/test/CodeGen/X86/isnan2.ll b/test/CodeGen/X86/isnan2.ll
new file mode 100644
index 0000000..7753346
--- /dev/null
+++ b/test/CodeGen/X86/isnan2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep pxor
+
+; This should not need to materialize 0.0 to evaluate the condition.
+
+define i32 @test(double %X) nounwind  {
+entry:
+	%tmp6 = fcmp uno double %X, 0.000000e+00		; <i1> [#uses=1]
+	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
+	ret i32 %tmp67
+}
+

diff --git a/test/CodeGen/X86/ispositive.ll b/test/CodeGen/X86/ispositive.ll
new file mode 100644
index 0000000..8adf723
--- /dev/null
+++ b/test/CodeGen/X86/ispositive.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep {shrl.*31}
+
+define i32 @test1(i32 %X) {
+entry:
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+

diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
new file mode 100644
index 0000000..c695c29
--- /dev/null
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll

@@ -0,0 +1,296 @@
+; RUN: llc < %s -march=x86-64 -o %t
+; RUN: grep inc %t | count 1
+; RUN: grep dec %t | count 2
+; RUN: grep addq %t | count 13
+; RUN: not grep addb %t
+; RUN: grep leaq %t | count 9
+; RUN: grep leal %t | count 3
+; RUN: grep movq %t | count 5
+
+; IV users in each of the loops from other loops shouldn't cause LSR
+; to insert new induction variables. Previously it would create a
+; flood of new induction variables.
+; Also, the loop reversal should kick in once.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
+entry:
+      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
+      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
+      %2 = or i32 %1, %0		; <i32> [#uses=1]
+      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+      br i1 %3, label %bb2, label %bb13
+
+bb:		; preds = %bb3
+      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
+      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
+      %6 = fmul float %4, %5		; <float> [#uses=1]
+      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
+      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
+      br label %bb2
+
+bb2:		; preds = %entry, %bb
+      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
+      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
+      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
+      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
+      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
+      br i1 %8, label %bb3, label %bb4
+
+bb3:		; preds = %bb2
+      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
+      %10 = and i64 %9, 15		; <i64> [#uses=1]
+      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
+      br i1 %11, label %bb4, label %bb
+
+bb4:		; preds = %bb3, %bb2
+      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
+      %13 = and i64 %12, 15		; <i64> [#uses=1]
+      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
+      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
+      br i1 %14, label %bb6.preheader, label %bb10.preheader
+
+bb10.preheader:		; preds = %bb4
+      br i1 %15, label %bb9, label %bb12.loopexit
+
+bb6.preheader:		; preds = %bb4
+      br i1 %15, label %bb5, label %bb8.loopexit
+
+bb5:		; preds = %bb5, %bb6.preheader
+      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
+      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
+	%indvar145 = trunc i64 %indvar143 to i32		; <i32> [#uses=1]
+	%tmp150 = mul i32 %indvar145, -16		; <i32> [#uses=1]
+	%N_addr.268 = add i32 %tmp150, %N_addr.0		; <i32> [#uses=1]
+	%A_addr.273.rec = shl i64 %indvar143, 4		; <i64> [#uses=5]
+	%B_addr.0.sum180 = add i64 %B_addr.0.rec, %A_addr.273.rec		; <i64> [#uses=2]
+	%B_addr.271 = getelementptr float* %B, i64 %B_addr.0.sum180		; <float*> [#uses=1]
+	%A_addr.273 = getelementptr float* %A, i64 %B_addr.0.sum180		; <float*> [#uses=1]
+	tail call void asm sideeffect ";# foo", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	%16 = bitcast float* %A_addr.273 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
+	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
+	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
+	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
+	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
+	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
+	%23 = bitcast float* %22 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%24 = load <4 x float>* %23, align 16		; <<4 x float>> [#uses=1]
+	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
+	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
+	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
+	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
+	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
+	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
+	%31 = bitcast float* %30 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%32 = load <4 x float>* %31, align 16		; <<4 x float>> [#uses=1]
+	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
+	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
+	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
+	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
+	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
+	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
+	%39 = bitcast float* %38 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%40 = load <4 x float>* %39, align 16		; <<4 x float>> [#uses=1]
+	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
+	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
+	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
+	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
+	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
+	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
+	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
+	%47 = getelementptr float* %B, i64 %A_addr.0.sum172		; <float*> [#uses=1]
+	%48 = add i32 %N_addr.268, -16		; <i32> [#uses=2]
+	%49 = icmp sgt i32 %48, 15		; <i1> [#uses=1]
+	%indvar.next144 = add i64 %indvar143, 1		; <i64> [#uses=1]
+	br i1 %49, label %bb5, label %bb8.loopexit
+
+bb7:		; preds = %bb7, %bb8.loopexit
+	%indvar130 = phi i64 [ 0, %bb8.loopexit ], [ %indvar.next131, %bb7 ]		; <i64> [#uses=3]
+	%vSum0.260 = phi <4 x float> [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ]		; <<4 x float>> [#uses=1]
+	%indvar132 = trunc i64 %indvar130 to i32		; <i32> [#uses=1]
+	%tmp133 = mul i32 %indvar132, -4		; <i32> [#uses=1]
+	%N_addr.358 = add i32 %tmp133, %N_addr.2.lcssa		; <i32> [#uses=1]
+	%A_addr.361.rec = shl i64 %indvar130, 2		; <i64> [#uses=3]
+	%B_addr.359 = getelementptr float* %B_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
+	%A_addr.361 = getelementptr float* %A_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
+	%50 = bitcast float* %A_addr.361 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
+	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
+	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
+	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
+	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
+	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
+	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
+	%58 = add i32 %N_addr.358, -4		; <i32> [#uses=2]
+	%59 = icmp sgt i32 %58, 3		; <i1> [#uses=1]
+	%indvar.next131 = add i64 %indvar130, 1		; <i64> [#uses=1]
+	br i1 %59, label %bb7, label %bb13
+
+bb8.loopexit:		; preds = %bb5, %bb6.preheader
+	%A_addr.2.lcssa = phi float* [ %A_addr.0, %bb6.preheader ], [ %46, %bb5 ]		; <float*> [#uses=3]
+	%vSum0.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=2]
+	%B_addr.2.lcssa = phi float* [ %B_addr.0, %bb6.preheader ], [ %47, %bb5 ]		; <float*> [#uses=3]
+	%vSum1.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=2]
+	%vSum2.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=2]
+	%N_addr.2.lcssa = phi i32 [ %N_addr.0, %bb6.preheader ], [ %48, %bb5 ]		; <i32> [#uses=3]
+	%vSum3.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=2]
+	%60 = icmp sgt i32 %N_addr.2.lcssa, 3		; <i1> [#uses=1]
+	br i1 %60, label %bb7, label %bb13
+
+bb9:		; preds = %bb9, %bb10.preheader
+	%indvar106 = phi i64 [ 0, %bb10.preheader ], [ %indvar.next107, %bb9 ]		; <i64> [#uses=3]
+	%vSum0.339 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum1.237 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum2.236 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum3.234 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=1]
+	%indvar108 = trunc i64 %indvar106 to i32		; <i32> [#uses=1]
+	%tmp113 = mul i32 %indvar108, -16		; <i32> [#uses=1]
+	%N_addr.435 = add i32 %tmp113, %N_addr.0		; <i32> [#uses=1]
+	%A_addr.440.rec = shl i64 %indvar106, 4		; <i64> [#uses=5]
+	%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec		; <i64> [#uses=2]
+	%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum		; <float*> [#uses=1]
+	%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum		; <float*> [#uses=1]
+	%61 = bitcast float* %B_addr.438 to <4 x float>*		; <i8*> [#uses=1]
+	%62 = load <4 x float>* %61, align 1
+	%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4		; <i64> [#uses=1]
+	%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169		; <i64> [#uses=2]
+	%63 = getelementptr float* %B, i64 %B_addr.0.sum187		; <float*> [#uses=1]
+	%64 = bitcast float* %63 to <4 x float>*		; <i8*> [#uses=1]
+	%65 = load <4 x float>* %64, align 1
+	%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8		; <i64> [#uses=1]
+	%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168		; <i64> [#uses=2]
+	%66 = getelementptr float* %B, i64 %B_addr.0.sum186		; <float*> [#uses=1]
+	%67 = bitcast float* %66 to <4 x float>*		; <i8*> [#uses=1]
+	%68 = load <4 x float>* %67, align 1
+	%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12		; <i64> [#uses=1]
+	%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167		; <i64> [#uses=2]
+	%69 = getelementptr float* %B, i64 %B_addr.0.sum185		; <float*> [#uses=1]
+	%70 = bitcast float* %69 to <4 x float>*		; <i8*> [#uses=1]
+	%71 = load <4 x float>* %70, align 1
+	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
+	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
+	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
+	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
+	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
+	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
+	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
+	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
+	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
+	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
+	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
+	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
+	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
+	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
+	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
+	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
+	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
+	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
+	%92 = getelementptr float* %B, i64 %A_addr.0.sum170		; <float*> [#uses=1]
+	%93 = add i32 %N_addr.435, -16		; <i32> [#uses=2]
+	%94 = icmp sgt i32 %93, 15		; <i1> [#uses=1]
+	%indvar.next107 = add i64 %indvar106, 1		; <i64> [#uses=1]
+	br i1 %94, label %bb9, label %bb12.loopexit
+
+bb11:		; preds = %bb11, %bb12.loopexit
+	%indvar = phi i64 [ 0, %bb12.loopexit ], [ %indvar.next, %bb11 ]		; <i64> [#uses=3]
+	%vSum0.428 = phi <4 x float> [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
+	%indvar96 = trunc i64 %indvar to i32		; <i32> [#uses=1]
+	%tmp = mul i32 %indvar96, -4		; <i32> [#uses=1]
+	%N_addr.526 = add i32 %tmp, %N_addr.4.lcssa		; <i32> [#uses=1]
+	%A_addr.529.rec = shl i64 %indvar, 2		; <i64> [#uses=3]
+	%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
+	%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
+	%95 = bitcast float* %B_addr.527 to <4 x float>*		; <i8*> [#uses=1]
+	%96 = load <4 x float>* %95, align 1
+	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
+	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
+	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
+	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
+	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
+	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
+	%103 = add i32 %N_addr.526, -4		; <i32> [#uses=2]
+	%104 = icmp sgt i32 %103, 3		; <i1> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br i1 %104, label %bb11, label %bb13
+
+bb12.loopexit:		; preds = %bb9, %bb10.preheader
+	%A_addr.4.lcssa = phi float* [ %A_addr.0, %bb10.preheader ], [ %91, %bb9 ]		; <float*> [#uses=3]
+	%vSum0.3.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=2]
+	%B_addr.4.lcssa = phi float* [ %B_addr.0, %bb10.preheader ], [ %92, %bb9 ]		; <float*> [#uses=3]
+	%vSum1.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=2]
+	%vSum2.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=2]
+	%N_addr.4.lcssa = phi i32 [ %N_addr.0, %bb10.preheader ], [ %93, %bb9 ]		; <i32> [#uses=3]
+	%vSum3.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=2]
+	%105 = icmp sgt i32 %N_addr.4.lcssa, 3		; <i1> [#uses=1]
+	br i1 %105, label %bb11, label %bb13
+
+bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
+	%Sum0.1 = phi float [ 0.000000e+00, %entry ], [ %Sum0.0, %bb7 ], [ %Sum0.0, %bb8.loopexit ], [ %Sum0.0, %bb11 ], [ %Sum0.0, %bb12.loopexit ]		; <float> [#uses=1]
+	%vSum3.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum3.0.lcssa, %bb7 ], [ %vSum3.0.lcssa, %bb8.loopexit ], [ %vSum3.2.lcssa, %bb11 ], [ %vSum3.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%N_addr.1 = phi i32 [ %N, %entry ], [ %N_addr.2.lcssa, %bb8.loopexit ], [ %58, %bb7 ], [ %N_addr.4.lcssa, %bb12.loopexit ], [ %103, %bb11 ]		; <i32> [#uses=2]
+	%vSum2.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum2.0.lcssa, %bb7 ], [ %vSum2.0.lcssa, %bb8.loopexit ], [ %vSum2.2.lcssa, %bb11 ], [ %vSum2.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%vSum1.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum1.0.lcssa, %bb7 ], [ %vSum1.0.lcssa, %bb8.loopexit ], [ %vSum1.2.lcssa, %bb11 ], [ %vSum1.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
+	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
+	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
+	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
+	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
+	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
+	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
+	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
+	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
+	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
+	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
+	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
+	%111 = fadd float %109, %110		; <float> [#uses=1]
+	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
+	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
+	br i1 %112, label %bb.nph56, label %bb16
+
+bb.nph56:		; preds = %bb13
+	%tmp. = zext i32 %N_addr.1 to i64		; <i64> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb14, %bb.nph56
+	%indvar117 = phi i64 [ 0, %bb.nph56 ], [ %indvar.next118, %bb14 ]		; <i64> [#uses=3]
+	%Sum0.255 = phi float [ %Sum0.254, %bb.nph56 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
+	%tmp.122 = sext i32 %IB to i64		; <i64> [#uses=1]
+	%B_addr.652.rec = mul i64 %indvar117, %tmp.122		; <i64> [#uses=1]
+	%tmp.124 = sext i32 %IA to i64		; <i64> [#uses=1]
+	%A_addr.653.rec = mul i64 %indvar117, %tmp.124		; <i64> [#uses=1]
+	%B_addr.652 = getelementptr float* %B_addr.1, i64 %B_addr.652.rec		; <float*> [#uses=1]
+	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
+	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
+	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
+	%115 = fmul float %113, %114		; <float> [#uses=1]
+	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
+	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb16, label %bb14
+
+bb16:		; preds = %bb14, %bb13
+	%Sum0.2.lcssa = phi float [ %Sum0.254, %bb13 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
+	store float %Sum0.2.lcssa, float* %C, align 4
+	ret void
+}

diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
new file mode 100644
index 0000000..5e8e162
--- /dev/null
+++ b/test/CodeGen/X86/jump_sign.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep jns
+
+define i32 @f(i32 %X) {
+entry:
+	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
+	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp3 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)

diff --git a/test/CodeGen/X86/large-gep-scale.ll b/test/CodeGen/X86/large-gep-scale.ll
new file mode 100644
index 0000000..143294e
--- /dev/null
+++ b/test/CodeGen/X86/large-gep-scale.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR5281
+
+; After scaling, this type doesn't fit in memory. Codegen should generate
+; correct addressing still.
+
+; CHECK: shll $2, %edx
+
+define fastcc i32* @_ada_smkr([2147483647 x i32]* %u, i32 %t) nounwind {
+  %x = getelementptr [2147483647 x i32]* %u, i32 %t, i32 0
+  ret i32* %x
+}

diff --git a/test/CodeGen/X86/ldzero.ll b/test/CodeGen/X86/ldzero.ll
new file mode 100644
index 0000000..dab04bc
--- /dev/null
+++ b/test/CodeGen/X86/ldzero.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s
+; verify PR 1700 is still fixed
+; ModuleID = 'hh.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define x86_fp80 @x() {
+entry:
+	%retval = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%tmp = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%d = alloca double, align 8		; <double*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store double 0.000000e+00, double* %d, align 8
+	%tmp1 = load double* %d, align 8		; <double> [#uses=1]
+	%tmp12 = fpext double %tmp1 to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp12, x86_fp80* %tmp, align 16
+	%tmp3 = load x86_fp80* %tmp, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load x86_fp80* %retval		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %retval4
+}
+
+define double @y() {
+entry:
+	%retval = alloca double, align 8		; <double*> [#uses=2]
+	%tmp = alloca double, align 8		; <double*> [#uses=2]
+	%ld = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %ld, align 16
+	%tmp1 = load x86_fp80* %ld, align 16		; <x86_fp80> [#uses=1]
+	%tmp12 = fptrunc x86_fp80 %tmp1 to double		; <double> [#uses=1]
+	store double %tmp12, double* %tmp, align 8
+	%tmp3 = load double* %tmp, align 8		; <double> [#uses=1]
+	store double %tmp3, double* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load double* %retval		; <double> [#uses=1]
+	ret double %retval4
+}

diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
new file mode 100644
index 0000000..6930350
--- /dev/null
+++ b/test/CodeGen/X86/lea-2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {lea	EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep add
+
+define i32 @test1(i32 %A, i32 %B) {
+        %tmp1 = shl i32 %A, 2           ; <i32> [#uses=1]
+        %tmp3 = add i32 %B, -5          ; <i32> [#uses=1]
+        %tmp4 = add i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+

diff --git a/test/CodeGen/X86/lea-3.ll b/test/CodeGen/X86/lea-3.ll
new file mode 100644
index 0000000..44413d6
--- /dev/null
+++ b/test/CodeGen/X86/lea-3.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep {leal	(%rdi,%rdi,2), %eax}
+define i32 @test(i32 %a) {
+        %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+; RUN: llc < %s -march=x86-64 | grep {leaq	(,%rdi,4), %rax}
+define i64 @test2(i64 %a) {
+        %tmp2 = shl i64 %a, 2
+	%tmp3 = or i64 %tmp2, %a
+        ret i64 %tmp3
+}
+
+;; TODO!  LEA instead of shift + copy.
+define i64 @test3(i64 %a) {
+        %tmp2 = shl i64 %a, 3
+        ret i64 %tmp2
+}
+

diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
new file mode 100644
index 0000000..2171204
--- /dev/null
+++ b/test/CodeGen/X86/lea-4.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 2
+
+define zeroext i16 @t1(i32 %on_off) nounwind {
+entry:
+	%0 = sub i32 %on_off, 1
+	%1 = mul i32 %0, 2
+	%2 = trunc i32 %1 to i16
+	%3 = zext i16 %2 to i32
+	%4 = trunc i32 %3 to i16
+	ret i16 %4
+}
+
+define i32 @t2(i32 %on_off) nounwind {
+entry:
+	%0 = sub i32 %on_off, 1
+	%1 = mul i32 %0, 2
+        %2 = and i32 %1, 65535
+	ret i32 %2
+}

diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
new file mode 100644
index 0000000..3f32fd2
--- /dev/null
+++ b/test/CodeGen/X86/lea-recursion.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 12
+
+; This testcase was written to demonstrate an instruction-selection problem,
+; however it also happens to expose a limitation in the DAGCombiner's
+; expression reassociation which causes it to miss opportunities for
+; constant folding due to the intermediate adds having multiple uses.
+; The Reassociate pass has similar limitations. If these limitations are
+; fixed, the test commands above will need to be updated to expect fewer
+; lea instructions.
+
+@g0 = weak global [1000 x i32] zeroinitializer, align 32		; <[1000 x i32]*> [#uses=8]
+@g1 = weak global [1000 x i32] zeroinitializer, align 32		; <[1000 x i32]*> [#uses=7]
+
+define void @foo() {
+entry:
+	%tmp4 = load i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp8 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp9 = add i32 %tmp4, 1		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=2]
+	store i32 %tmp10, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 1)
+	%tmp8.1 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp9.1 = add i32 %tmp10, 1		; <i32> [#uses=1]
+	%tmp10.1 = add i32 %tmp9.1, %tmp8.1		; <i32> [#uses=2]
+	store i32 %tmp10.1, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 2)
+	%tmp8.2 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp9.2 = add i32 %tmp10.1, 1		; <i32> [#uses=1]
+	%tmp10.2 = add i32 %tmp9.2, %tmp8.2		; <i32> [#uses=2]
+	store i32 %tmp10.2, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 3)
+	%tmp8.3 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp9.3 = add i32 %tmp10.2, 1		; <i32> [#uses=1]
+	%tmp10.3 = add i32 %tmp9.3, %tmp8.3		; <i32> [#uses=2]
+	store i32 %tmp10.3, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 4)
+	%tmp8.4 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 4)		; <i32> [#uses=1]
+	%tmp9.4 = add i32 %tmp10.3, 1		; <i32> [#uses=1]
+	%tmp10.4 = add i32 %tmp9.4, %tmp8.4		; <i32> [#uses=2]
+	store i32 %tmp10.4, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 5)
+	%tmp8.5 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 5)		; <i32> [#uses=1]
+	%tmp9.5 = add i32 %tmp10.4, 1		; <i32> [#uses=1]
+	%tmp10.5 = add i32 %tmp9.5, %tmp8.5		; <i32> [#uses=2]
+	store i32 %tmp10.5, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 6)
+	%tmp8.6 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 6)		; <i32> [#uses=1]
+	%tmp9.6 = add i32 %tmp10.5, 1		; <i32> [#uses=1]
+	%tmp10.6 = add i32 %tmp9.6, %tmp8.6		; <i32> [#uses=1]
+	store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
+	ret void
+}
+

diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
new file mode 100644
index 0000000..22a9644
--- /dev/null
+++ b/test/CodeGen/X86/lea.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define i32 @test1(i32 %x) nounwind {
+        %tmp1 = shl i32 %x, 3
+        %tmp2 = add i32 %tmp1, 7
+        ret i32 %tmp2
+; CHECK: test1:
+; CHECK:    leal 7(,%rdi,8), %eax
+}
+
+
+; ISel the add of -4 with a neg and use an lea for the rest of the
+; arithemtic.
+define i32 @test2(i32 %x_offs) nounwind readnone {
+entry:
+	%t0 = icmp sgt i32 %x_offs, 4
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:
+	%tmp = add i32 %x_offs, -5
+	%tmp6 = lshr i32 %tmp, 2
+	%tmp7 = mul i32 %tmp6, -4
+	%tmp8 = add i32 %tmp7, %x_offs
+	%tmp9 = add i32 %tmp8, -4
+	ret i32 %tmp9
+
+bb2:
+	ret i32 %x_offs
+; CHECK: test2:
+; CHECK:	leal	-5(%rdi), %eax
+; CHECK:	andl	$-4, %eax
+; CHECK:	negl	%eax
+; CHECK:	leal	-4(%rdi,%rax), %eax
+}

diff --git a/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll b/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll
new file mode 100644
index 0000000..6a8c154
--- /dev/null
+++ b/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll

@@ -0,0 +1,11 @@
+; RUN: llc -march=x86-64 -enable-legalize-types-checking < %s
+; PR5092
+
+define <4 x float> @bug(float %a) nounwind {
+entry:
+  %cmp = fcmp oeq float %a, 0.000000e+00          ; <i1> [#uses=1]
+  %temp = select i1 %cmp, <4 x float> <float 1.000000e+00, float 0.000000e+00,
+float 0.000000e+00, float 0.000000e+00>, <4 x float> zeroinitializer
+  ret <4 x float> %temp
+}
+

diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
new file mode 100644
index 0000000..574b46a
--- /dev/null
+++ b/test/CodeGen/X86/legalizedag_vec.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
+; RUN: grep {call.*divdi3}  %t | count 2
+
+
+; Test case for r63760 where we generate a legalization assert that an illegal
+; type has been inserted by LegalizeDAG after LegalizeType has run. With sse2,
+; v2i64 is a legal type but with mmx disabled, i64 is an illegal type. When
+; legalizing the divide in LegalizeDAG, we scalarize the vector divide and make
+; two 64 bit divide library calls which introduces i64 nodes that needs to be
+; promoted.
+
+define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
+  %div.r = sdiv <2 x i64> %num, %div
+  ret <2 x i64>  %div.r
+}                                     

diff --git a/test/CodeGen/X86/lfence.ll b/test/CodeGen/X86/lfence.ll
new file mode 100644
index 0000000..7a96ca3
--- /dev/null
+++ b/test/CodeGen/X86/lfence.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep lfence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 true)
+	ret void
+}

diff --git a/test/CodeGen/X86/limited-prec.ll b/test/CodeGen/X86/limited-prec.ll
new file mode 100644
index 0000000..7bf4ac2
--- /dev/null
+++ b/test/CodeGen/X86/limited-prec.ll

@@ -0,0 +1,60 @@
+; RUN: llc < %s -limit-float-precision=6 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+; RUN: llc < %s -limit-float-precision=12 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+; RUN: llc < %s -limit-float-precision=18 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+
+define float @f1(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.exp.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.exp.f32(float) nounwind readonly
+
+define float @f2(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.exp2.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.exp2.f32(float) nounwind readonly
+
+define float @f3(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.pow.f32(float 1.000000e+01, float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.pow.f32(float, float) nounwind readonly
+
+define float @f4(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log.f32(float) nounwind readonly
+
+define float @f5(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log2.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log2.f32(float) nounwind readonly
+
+define float @f6(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log10.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log10.f32(float) nounwind readonly

diff --git a/test/CodeGen/X86/live-out-reg-info.ll b/test/CodeGen/X86/live-out-reg-info.ll
new file mode 100644
index 0000000..8cd9774
--- /dev/null
+++ b/test/CodeGen/X86/live-out-reg-info.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep testb
+
+; Make sure dagcombine doesn't eliminate the comparison due
+; to an off-by-one bug with ComputeMaskedBits information.
+
+declare void @qux()
+
+define void @foo(i32 %a) {
+  %t0 = lshr i32 %a, 23
+  br label %next
+next:
+  %t1 = and i32 %t0, 256
+  %t2 = icmp eq i32 %t1, 0
+  br i1 %t2, label %true, label %false
+true:
+  call void @qux()
+  ret void
+false:
+  ret void
+}

diff --git a/test/CodeGen/X86/local-liveness.ll b/test/CodeGen/X86/local-liveness.ll
new file mode 100644
index 0000000..321f208
--- /dev/null
+++ b/test/CodeGen/X86/local-liveness.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -regalloc=local | grep {subl	%eax, %edx}
+
+; Local regalloc shouldn't assume that both the uses of the
+; sub instruction are kills, because one of them is tied
+; to an output. Previously, it was allocating both inputs
+; in the same register.
+
+define i32 @func_3() nounwind {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%g_323 = alloca i8		; <i8*> [#uses=2]
+	%p_5 = alloca i64, align 8		; <i64*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i64 0, i64* %p_5, align 8
+	store i8 1, i8* %g_323, align 1
+	%1 = load i8* %g_323, align 1		; <i8> [#uses=1]
+	%2 = sext i8 %1 to i64		; <i64> [#uses=1]
+	%3 = load i64* %p_5, align 8		; <i64> [#uses=1]
+	%4 = sub i64 %3, %2		; <i64> [#uses=1]
+	%5 = icmp sge i64 %4, 0		; <i1> [#uses=1]
+	%6 = zext i1 %5 to i32		; <i32> [#uses=1]
+	store i32 %6, i32* %0, align 4
+	%7 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %7, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}

diff --git a/test/CodeGen/X86/long-setcc.ll b/test/CodeGen/X86/long-setcc.ll
new file mode 100644
index 0000000..e0165fb
--- /dev/null
+++ b/test/CodeGen/X86/long-setcc.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep shr | count 1
+; RUN: llc < %s -march=x86 | grep xor | count 1
+
+define i1 @t1(i64 %x) nounwind {
+	%B = icmp slt i64 %x, 0
+	ret i1 %B
+}
+
+define i1 @t2(i64 %x) nounwind {
+	%tmp = icmp ult i64 %x, 4294967296
+	ret i1 %tmp
+}
+
+define i1 @t3(i32 %x) nounwind {
+	%tmp = icmp ugt i32 %x, -1
+	ret i1 %tmp
+}

diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll
new file mode 100644
index 0000000..9a4c8f2
--- /dev/null
+++ b/test/CodeGen/X86/longlong-deadload.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep '4{(%...)}
+; This should not load or store the top part of *P.
+
+define void @test(i64* %P) nounwind  {
+entry:
+	%tmp1 = load i64* %P, align 8		; <i64> [#uses=1]
+	%tmp2 = xor i64 %tmp1, 1		; <i64> [#uses=1]
+	store i64 %tmp2, i64* %P, align 8
+	ret void
+}
+

diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
new file mode 100644
index 0000000..a125e54
--- /dev/null
+++ b/test/CodeGen/X86/loop-blocks.ll

@@ -0,0 +1,207 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+
+; These tests check for loop branching structure, and that the loop align
+; directive is placed in the expected place.
+
+; CodeGen should insert a branch into the middle of the loop in
+; order to avoid a branch within the loop.
+
+; CHECK: simple:
+;      CHECK:   jmp   .LBB1_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT:   callq loop_latch
+; CHECK-NEXT: .LBB1_1:
+; CHECK-NEXT:   callq loop_header
+
+define void @simple() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = tail call i32 @get()
+  %t1 = icmp slt i32 %t0, 0
+  br i1 %t1, label %done, label %bb
+
+bb:
+  call void @loop_latch()
+  br label %loop
+
+done:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move block_a to the top of the loop so that it
+; falls through into the loop, avoiding a branch within the loop.
+
+; CHECK: slightly_more_involved:
+;      CHECK:   jmp .LBB2_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT:   callq bar99
+; CHECK-NEXT: .LBB2_1:
+; CHECK-NEXT:   callq body
+
+define void @slightly_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; Same as slightly_more_involved, but block_a is now a CFG diamond with
+; fallthrough edges which should be preserved.
+
+; CHECK: yet_more_involved:
+;      CHECK:   jmp .LBB3_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT:   callq bar99
+; CHECK-NEXT:   callq get
+; CHECK-NEXT:   cmpl $2999, %eax
+; CHECK-NEXT:   jg .LBB3_6
+; CHECK-NEXT:   callq block_a_true_func
+; CHECK-NEXT:   jmp .LBB3_7
+; CHECK-NEXT: .LBB3_6:
+; CHECK-NEXT:   callq block_a_false_func
+; CHECK-NEXT: .LBB3_7:
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT:   callq body
+
+define void @yet_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  %z0 = call i32 @get()
+  %z1 = icmp slt i32 %z0, 3000
+  br i1 %z1, label %block_a_true, label %block_a_false
+
+block_a_true:
+  call void @block_a_true_func()
+  br label %block_a_merge
+
+block_a_false:
+  call void @block_a_false_func()
+  br label %block_a_merge
+
+block_a_merge:
+  call void @block_a_merge_func()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move the CFG islands that are part of the loop but don't
+; conveniently fit anywhere so that they are at least contiguous with the
+; loop.
+
+; CHECK: cfg_islands:
+;      CHECK:   jmp     .LBB4_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB4_7:
+; CHECK-NEXT:   callq   bar100
+; CHECK-NEXT:   jmp     .LBB4_1
+; CHECK-NEXT: .LBB4_8:
+; CHECK-NEXT:   callq   bar101
+; CHECK-NEXT:   jmp     .LBB4_1
+; CHECK-NEXT: .LBB4_9:
+; CHECK-NEXT:   callq   bar102
+; CHECK-NEXT:   jmp     .LBB4_1
+; CHECK-NEXT: .LBB4_5:
+; CHECK-NEXT:   callq   loop_latch
+; CHECK-NEXT: .LBB4_1:
+; CHECK-NEXT:   callq   loop_header
+
+define void @cfg_islands() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 100
+  br i1 %t1, label %block100, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 101
+  br i1 %t3, label %block101, label %bb1
+
+bb1:
+  %t4 = call i32 @get()
+  %t5 = icmp slt i32 %t4, 102
+  br i1 %t5, label %block102, label %bb2
+
+bb2:
+  %t6 = call i32 @get()
+  %t7 = icmp slt i32 %t6, 103
+  br i1 %t7, label %exit, label %bb3
+
+bb3:
+  call void @loop_latch()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+
+block100:
+  call void @bar100()
+  br label %loop
+
+block101:
+  call void @bar101()
+  br label %loop
+
+block102:
+  call void @bar102()
+  br label %loop
+}
+
+declare void @bar99() nounwind
+declare void @bar100() nounwind
+declare void @bar101() nounwind
+declare void @bar102() nounwind
+declare void @body() nounwind
+declare void @exit() nounwind
+declare void @loop_header() nounwind
+declare void @loop_latch() nounwind
+declare i32 @get() nounwind
+declare void @block_a_true_func() nounwind
+declare void @block_a_false_func() nounwind
+declare void @block_a_merge_func() nounwind

diff --git a/test/CodeGen/X86/loop-hoist.ll b/test/CodeGen/X86/loop-hoist.ll
new file mode 100644
index 0000000..b9008e5
--- /dev/null
+++ b/test/CodeGen/X86/loop-hoist.ll

@@ -0,0 +1,27 @@
+; LSR should hoist the load from the "Arr" stub out of the loop.
+
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK:    L_Arr$non_lazy_ptr
+; CHECK: LBB1_1:
+
+@Arr = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+
+define void @foo(i32 %N.in, i32 %x) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	br label %cond_true
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ %x, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp = getelementptr [0 x i32]* @Arr, i32 0, i32 %i.0.0		; <i32*> [#uses=1]
+	store i32 %i.0.0, i32* %tmp
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true
+	ret void
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
new file mode 100644
index 0000000..30b5114
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic | \
+; RUN:   grep {, 4} | count 1
+; RUN: llc < %s -march=x86 | not grep lea
+;
+; Make sure the common loop invariant A is hoisted up to preheader,
+; since too many registers are needed to subsume it into the addressing modes.
+; It's safe to sink A in when it's not pic.
+
+@A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
new file mode 100644
index 0000000..70c9134
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | \
+; RUN:   grep {A+} | count 2
+;
+; Make sure the common loop invariant A is not hoisted up to preheader,
+; since it can be subsumed it into the addressing modes.
+
+@A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
new file mode 100644
index 0000000..4cb56ca
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | \
+; RUN:   grep {A+} | count 2
+;
+; Make sure the common loop invariant A is not hoisted up to preheader,
+; since it can be subsumed into the addressing mode in all uses.
+
+@A = internal global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
new file mode 100644
index 0000000..9b53adb
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
+;
+; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader.
+
+@flags2 = internal global [8193 x i8] zeroinitializer, align 32		; <[8193 x i8]*> [#uses=1]
+
+define void @test(i32 %k, i32 %i) nounwind {
+entry:
+	%k_addr.012 = shl i32 %i, 1		; <i32> [#uses=1]
+	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]
+	br i1 %tmp14, label %return, label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp. = shl i32 %i, 1		; <i32> [#uses=1]
+	%tmp.15 = mul i32 %indvar, %i		; <i32> [#uses=1]
+	%tmp.16 = add i32 %tmp.15, %tmp.		; <i32> [#uses=2]
+	%k_addr.0.0 = bitcast i32 %tmp.16 to i32		; <i32> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp.16 to i64		; <i64> [#uses=1]
+	%tmp = getelementptr [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp
+	%k_addr.0 = add i32 %k_addr.0.0, %i		; <i32> [#uses=1]
+	%tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.upgrd.2, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
new file mode 100644
index 0000000..c45a374
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce3.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86 | grep cmp | grep 240
+; RUN: llc < %s -march=x86 | grep inc | count 1
+
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
+entry:
+	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
+	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
+
+bb26.outer.us:		; preds = %bb26.bb32_crit_edge.us, %entry
+	%i.044.0.ph.us = phi i32 [ 0, %entry ], [ %indvar.next57, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=2]
+	%k.1.ph.us = phi i32 [ 0, %entry ], [ %k.0.us, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=1]
+	%tmp3.us = mul i32 %i.044.0.ph.us, 6		; <i32> [#uses=1]
+	br label %bb1.us
+
+bb1.us:		; preds = %bb1.us, %bb26.outer.us
+	%j.053.us = phi i32 [ 0, %bb26.outer.us ], [ %tmp25.us, %bb1.us ]		; <i32> [#uses=2]
+	%k.154.us = phi i32 [ %k.1.ph.us, %bb26.outer.us ], [ %k.0.us, %bb1.us ]		; <i32> [#uses=1]
+	%tmp5.us = add i32 %tmp3.us, %j.053.us		; <i32> [#uses=1]
+	%tmp7.us = shl i32 %D, %tmp5.us		; <i32> [#uses=2]
+	%tmp9.us = icmp eq i32 %tmp7.us, %B		; <i1> [#uses=1]
+	%tmp910.us = zext i1 %tmp9.us to i32		; <i32> [#uses=1]
+	%tmp12.us = and i32 %tmp7.us, %A		; <i32> [#uses=1]
+	%tmp19.us = and i32 %tmp12.us, %tmp910.us		; <i32> [#uses=1]
+	%k.0.us = add i32 %tmp19.us, %k.154.us		; <i32> [#uses=3]
+	%tmp25.us = add i32 %j.053.us, 1		; <i32> [#uses=2]
+	%tmp29.us = icmp slt i32 %tmp25.us, %C		; <i1> [#uses=1]
+	br i1 %tmp29.us, label %bb1.us, label %bb26.bb32_crit_edge.us
+
+bb26.bb32_crit_edge.us:		; preds = %bb1.us
+	%indvar.next57 = add i32 %i.044.0.ph.us, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next57, 40		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb40.split, label %bb26.outer.us
+
+bb40.split:		; preds = %bb26.bb32_crit_edge.us, %entry
+	%k.1.lcssa.lcssa.us-lcssa = phi i32 [ %k.0.us, %bb26.bb32_crit_edge.us ], [ 0, %entry ]		; <i32> [#uses=1]
+	ret i32 %k.1.lcssa.lcssa.us-lcssa
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
new file mode 100644
index 0000000..07e46ec
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 | grep cmp | grep 64
+; RUN: llc < %s -march=x86 | not grep inc
+
+@state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
+@S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
+
+define i32 @foo() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%t.063.0 = phi i32 [ 0, %entry ], [ %tmp47, %bb ]		; <i32> [#uses=1]
+	%j.065.0 = shl i32 %indvar, 2		; <i32> [#uses=4]
+	%tmp3 = getelementptr [0 x i32]* @state, i32 0, i32 %j.065.0		; <i32*> [#uses=2]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp6 = getelementptr [0 x i32]* @S, i32 0, i32 %t.063.0		; <i32*> [#uses=1]
+	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
+	%tmp8 = xor i32 %tmp7, %tmp4		; <i32> [#uses=2]
+	store i32 %tmp8, i32* %tmp3, align 4
+	%tmp1378 = or i32 %j.065.0, 1		; <i32> [#uses=1]
+	%tmp16 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp1378		; <i32*> [#uses=2]
+	%tmp17 = load i32* %tmp16, align 4		; <i32> [#uses=1]
+	%tmp19 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp8		; <i32*> [#uses=1]
+	%tmp20 = load i32* %tmp19, align 4		; <i32> [#uses=1]
+	%tmp21 = xor i32 %tmp20, %tmp17		; <i32> [#uses=2]
+	store i32 %tmp21, i32* %tmp16, align 4
+	%tmp2680 = or i32 %j.065.0, 2		; <i32> [#uses=1]
+	%tmp29 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp2680		; <i32*> [#uses=2]
+	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
+	%tmp32 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp21		; <i32*> [#uses=1]
+	%tmp33 = load i32* %tmp32, align 4		; <i32> [#uses=1]
+	%tmp34 = xor i32 %tmp33, %tmp30		; <i32> [#uses=2]
+	store i32 %tmp34, i32* %tmp29, align 4
+	%tmp3982 = or i32 %j.065.0, 3		; <i32> [#uses=1]
+	%tmp42 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp3982		; <i32*> [#uses=2]
+	%tmp43 = load i32* %tmp42, align 4		; <i32> [#uses=1]
+	%tmp45 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp34		; <i32*> [#uses=1]
+	%tmp46 = load i32* %tmp45, align 4		; <i32> [#uses=1]
+	%tmp47 = xor i32 %tmp46, %tmp43		; <i32> [#uses=3]
+	store i32 %tmp47, i32* %tmp42, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 4		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb57, label %bb
+
+bb57:		; preds = %bb
+	%tmp59 = and i32 %tmp47, 255		; <i32> [#uses=1]
+	ret i32 %tmp59
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
new file mode 100644
index 0000000..b07eeb6
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep inc | count 1
+
+@X = weak global i16 0		; <i16*> [#uses=1]
+@Y = weak global i16 0		; <i16*> [#uses=1]
+
+define void @foo(i32 %N) nounwind {
+entry:
+	%tmp1019 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %tmp1019, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp1 = trunc i32 %i.014.0 to i16		; <i16> [#uses=2]
+	volatile store i16 %tmp1, i16* @X, align 2
+	%tmp34 = shl i16 %tmp1, 2		; <i16> [#uses=1]
+	volatile store i16 %tmp34, i16* @Y, align 2
+	%indvar.next = add i32 %i.014.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce6.ll b/test/CodeGen/X86/loop-strength-reduce6.ll
new file mode 100644
index 0000000..bbafcf7
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce6.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | not grep inc
+
+define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
+entry:
+	br i1 false, label %cond_next191, label %cond_true189
+
+cond_true189:		; preds = %entry
+	ret i32 0
+
+cond_next191:		; preds = %entry
+	br i1 false, label %cond_next37.i, label %cond_false.i9
+
+cond_false.i9:		; preds = %cond_next191
+	ret i32 0
+
+cond_next37.i:		; preds = %cond_next191
+	br i1 false, label %cond_false50.i, label %cond_true44.i
+
+cond_true44.i:		; preds = %cond_next37.i
+	br i1 false, label %cond_true11.i.i, label %bb414.preheader.i
+
+cond_true11.i.i:		; preds = %cond_true44.i
+	ret i32 0
+
+cond_false50.i:		; preds = %cond_next37.i
+	ret i32 0
+
+bb414.preheader.i:		; preds = %cond_true44.i
+	br i1 false, label %bb.i18, label %do_layer3.exit
+
+bb.i18:		; preds = %bb414.preheader.i
+	br i1 false, label %bb358.i, label %cond_true79.i
+
+cond_true79.i:		; preds = %bb.i18
+	ret i32 0
+
+bb331.i:		; preds = %bb358.i, %cond_true.i149.i
+	br i1 false, label %cond_true.i149.i, label %cond_false.i151.i
+
+cond_true.i149.i:		; preds = %bb331.i
+	br i1 false, label %bb178.preheader.i.i, label %bb331.i
+
+cond_false.i151.i:		; preds = %bb331.i
+	ret i32 0
+
+bb163.i.i:		; preds = %bb178.preheader.i.i, %bb163.i.i
+	%rawout2.451.rec.i.i = phi i64 [ 0, %bb178.preheader.i.i ], [ %indvar.next260.i, %bb163.i.i ]		; <i64> [#uses=2]
+	%i.052.i.i = trunc i64 %rawout2.451.rec.i.i to i32		; <i32> [#uses=1]
+	%tmp165.i144.i = shl i32 %i.052.i.i, 5		; <i32> [#uses=1]
+	%tmp165169.i.i = sext i32 %tmp165.i144.i to i64		; <i64> [#uses=0]
+	%indvar.next260.i = add i64 %rawout2.451.rec.i.i, 1		; <i64> [#uses=2]
+	%exitcond261.i = icmp eq i64 %indvar.next260.i, 18		; <i1> [#uses=1]
+	br i1 %exitcond261.i, label %bb178.preheader.i.i, label %bb163.i.i
+
+bb178.preheader.i.i:		; preds = %bb163.i.i, %cond_true.i149.i
+	br label %bb163.i.i
+
+bb358.i:		; preds = %bb.i18
+	br i1 false, label %bb331.i, label %bb406.i
+
+bb406.i:		; preds = %bb358.i
+	ret i32 0
+
+do_layer3.exit:		; preds = %bb414.preheader.i
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce7.ll b/test/CodeGen/X86/loop-strength-reduce7.ll
new file mode 100644
index 0000000..4b565a6
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce7.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 | not grep imul
+
+target triple = "i386-apple-darwin9.6"
+	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @outer_loop(%struct.lame_global_flags* nocapture %gfp, double* nocapture %xr, i32 %targ_bits, double* nocapture %best_noise, %struct.III_psy_xmin* nocapture %l3_xmin, i32* nocapture %l3_enc, %struct.III_scalefac_t* nocapture %scalefac, %struct.gr_info* nocapture %cod_info, i32 %ch) nounwind {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb4, %entry
+	br i1 true, label %bb5, label %bb4
+
+bb5:		; preds = %bb4
+	br i1 true, label %bb28.i37, label %bb.i4
+
+bb.i4:		; preds = %bb.i4, %bb5
+	br label %bb.i4
+
+bb28.i37:		; preds = %bb33.i47, %bb5
+	%i.1.reg2mem.0.i = phi i32 [ %0, %bb33.i47 ], [ 0, %bb5 ]		; <i32> [#uses=2]
+	%0 = add i32 %i.1.reg2mem.0.i, 1		; <i32> [#uses=2]
+	br label %bb29.i38
+
+bb29.i38:		; preds = %bb33.i47, %bb28.i37
+	%indvar32.i = phi i32 [ %indvar.next33.i, %bb33.i47 ], [ 0, %bb28.i37 ]		; <i32> [#uses=2]
+	%sfb.314.i = add i32 %indvar32.i, 0		; <i32> [#uses=3]
+	%1 = getelementptr [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i		; <double*> [#uses=1]
+	%2 = load double* %1, align 8		; <double> [#uses=0]
+	br i1 false, label %bb30.i41, label %bb33.i47
+
+bb30.i41:		; preds = %bb29.i38
+	%3 = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.314.i, i32 %i.1.reg2mem.0.i		; <i32*> [#uses=1]
+	store i32 0, i32* %3, align 4
+	br label %bb33.i47
+
+bb33.i47:		; preds = %bb30.i41, %bb29.i38
+	%4 = add i32 %sfb.314.i, 1		; <i32> [#uses=1]
+	%phitmp.i46 = icmp ugt i32 %4, 11		; <i1> [#uses=1]
+	%indvar.next33.i = add i32 %indvar32.i, 1		; <i32> [#uses=1]
+	br i1 %phitmp.i46, label %bb28.i37, label %bb29.i38
+}

diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
new file mode 100644
index 0000000..e14cd8a
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll

@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element*, %struct.bitmap_element*, i32 }
+	%struct.branch_path = type { %struct.rtx_def*, i32 }
+	%struct.c_lang_decl = type <{ i8, [3 x i8] }>
+	%struct.constant_descriptor = type { %struct.constant_descriptor*, i8*, %struct.rtx_def*, { x86_fp80 } }
+	%struct.eh_region = type { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, i32, %struct.bitmap_head_def*, i32, { { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, %struct.rtx_def* } }, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.eh_status = type { %struct.eh_region*, %struct.eh_region**, %struct.eh_region*, %struct.eh_region*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.branch_path*, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %struct.tree_node**, %struct.rtx_def** }
+	%struct.equiv_table = type { %struct.rtx_def*, %struct.rtx_def* }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i8, i8, i8 }
+	%struct.goto_fixup = type { %struct.goto_fixup*, %struct.rtx_def*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.rtx_def*, %struct.tree_node* }
+	%struct.initial_value_struct = type { i32, i32, %struct.equiv_table* }
+	%struct.label_chain = type { %struct.label_chain*, %struct.tree_node* }
+	%struct.lang_decl = type { %struct.c_lang_decl, %struct.tree_node* }
+	%struct.language_function = type { %struct.stmt_tree_s, %struct.tree_node* }
+	%struct.machine_function = type { [59 x [3 x %struct.rtx_def*]], i32, i32 }
+	%struct.nesting = type { %struct.nesting*, %struct.nesting*, i32, %struct.rtx_def*, { { i32, %struct.rtx_def*, %struct.rtx_def*, %struct.nesting*, %struct.tree_node*, %struct.tree_node*, %struct.label_chain*, i32, i32, i32, i32, %struct.rtx_def*, %struct.tree_node** } } }
+	%struct.pool_constant = type { %struct.constant_descriptor*, %struct.pool_constant*, %struct.pool_constant*, %struct.rtx_def*, i32, i32, i32, i64, i32 }
+	%struct.rtunion = type { i64 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack* }
+	%struct.stmt_status = type { %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, i32, i32, %struct.tree_node*, %struct.rtx_def*, i32, i8*, i32, %struct.goto_fixup* }
+	%struct.stmt_tree_s = type { %struct.tree_node*, %struct.tree_node*, i8*, i32 }
+	%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %struct.tree_node*, %struct.tree_node*, i8, i8, i32, i32, i64, i64 }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, { %struct.function* }, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_exp = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type { %struct.constant_descriptor**, %struct.pool_constant**, %struct.pool_constant*, %struct.pool_constant*, i64, %struct.rtx_def* }
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data }
+@lineno = internal global i32 0		; <i32*> [#uses=1]
+@tree_code_length = internal global [256 x i32] zeroinitializer
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.tree_node* (i32, ...)* @build_stmt to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.tree_node* @build_stmt(i32 %code, ...) nounwind {
+entry:
+	%p = alloca i8*		; <i8**> [#uses=3]
+	%p1 = bitcast i8** %p to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %p1)
+	%0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind		; <%struct.tree_node*> [#uses=2]
+	%1 = getelementptr [256 x i32]* @tree_code_length, i32 0, i32 %code		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=2]
+	%3 = load i32* @lineno, align 4		; <i32> [#uses=1]
+	%4 = bitcast %struct.tree_node* %0 to %struct.tree_exp*		; <%struct.tree_exp*> [#uses=2]
+	%5 = getelementptr %struct.tree_exp* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %3, i32* %5, align 4
+	%6 = icmp sgt i32 %2, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb3
+
+bb:		; preds = %bb, %entry
+	%i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
+	%7 = load i8** %p, align 4		; <i8*> [#uses=2]
+	%8 = getelementptr i8* %7, i32 4		; <i8*> [#uses=1]
+	store i8* %8, i8** %p, align 4
+	%9 = bitcast i8* %7 to %struct.tree_node**		; <%struct.tree_node**> [#uses=1]
+	%10 = load %struct.tree_node** %9, align 4		; <%struct.tree_node*> [#uses=1]
+	%11 = getelementptr %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01		; <%struct.tree_node**> [#uses=1]
+	store %struct.tree_node* %10, %struct.tree_node** %11, align 4
+	%indvar.next = add i32 %i.01, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %2		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb3, label %bb
+
+bb3:		; preds = %bb, %entry
+	call void @llvm.va_end(i8* %p1)
+	ret %struct.tree_node* %0
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare fastcc %struct.tree_node* @make_node(i32) nounwind

diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
new file mode 100644
index 0000000..474450a
--- /dev/null
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll

@@ -0,0 +1,137 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: decq
+; CHECK-NEXT: jne
+
+@Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
+@Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
+@Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+
+define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
+entry:
+	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32* %rk, i64 1		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
+	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
+	br label %bb
+
+bb:		; preds = %bb1, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
+	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
+	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
+	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
+	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
+	%8 = and i32 %7, 255		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
+	%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
+	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = xor i32 %11, %6		; <i32> [#uses=1]
+	%16 = xor i32 %15, %14		; <i32> [#uses=3]
+	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
+	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
+	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
+	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
+	%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
+	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
+	%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = xor i32 %24, %20		; <i32> [#uses=1]
+	%29 = xor i32 %28, %27		; <i32> [#uses=4]
+	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
+	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
+	%34 = getelementptr i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
+	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%37 = and i32 %36, 255		; <i32> [#uses=1]
+	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
+	%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%42 = xor i32 %40, %33		; <i32> [#uses=1]
+	%43 = xor i32 %42, %41		; <i32> [#uses=1]
+	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = and i32 %16, 255		; <i32> [#uses=1]
+	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
+	%52 = getelementptr i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
+	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = xor i32 %51, %47		; <i32> [#uses=1]
+	%56 = xor i32 %55, %54		; <i32> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br label %bb
+
+bb2:		; preds = %bb
+	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
+	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
+	%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
+	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%60 = and i32 %59, 255		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
+	%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
+	%65 = or i32 %64, %58		; <i32> [#uses=1]
+	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%67 = xor i32 %65, %66		; <i32> [#uses=2]
+	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
+	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
+	%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
+	%73 = and i32 %16, 255		; <i32> [#uses=1]
+	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
+	%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
+	%78 = or i32 %77, %72		; <i32> [#uses=1]
+	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
+	%79 = getelementptr i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
+	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = xor i32 %78, %81		; <i32> [#uses=2]
+	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
+	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
+	store i8 %84, i8* %out, align 1
+	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
+	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
+	%87 = getelementptr i8* %out, i64 1		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = getelementptr i8* %out, i64 4		; <i8*> [#uses=1]
+	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
+	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
+	store i8 %90, i8* %88, align 1
+	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
+	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
+	%93 = getelementptr i8* %out, i64 5		; <i8*> [#uses=1]
+	store i8 %92, i8* %93, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/lsr-negative-stride.ll b/test/CodeGen/X86/lsr-negative-stride.ll
new file mode 100644
index 0000000..b08356c
--- /dev/null
+++ b/test/CodeGen/X86/lsr-negative-stride.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: not grep neg %t
+; RUN: not grep sub.*esp %t
+; RUN: not grep esi %t
+; RUN: not grep push %t
+
+; This corresponds to:
+;int t(int a, int b) {
+;  while (a != b) {
+;    if (a > b)
+;      a -= b;
+;    else
+;      b -= a;
+;  }
+;  return a;
+;}
+
+
+define i32 @t(i32 %a, i32 %b) nounwind {
+entry:
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}

diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
new file mode 100644
index 0000000..1f3b59a
--- /dev/null
+++ b/test/CodeGen/X86/lsr-sort.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep inc %t | count 1
+; RUN: not grep incw %t
+
+@X = common global i16 0		; <i16*> [#uses=1]
+
+define i32 @foo(i32 %N) nounwind {
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%1 = trunc i32 %i.03 to i16		; <i16> [#uses=1]
+	volatile store i16 %1, i16* @X, align 2
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+        %h = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]
+	ret i32 %h
+}

diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
new file mode 100644
index 0000000..bc493bd
--- /dev/null
+++ b/test/CodeGen/X86/masked-iv-safe.ll

@@ -0,0 +1,244 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep and %t
+; RUN: not grep movz %t
+; RUN: not grep sar %t
+; RUN: not grep shl %t
+; RUN: grep add %t | count 2
+; RUN: grep inc %t | count 4
+; RUN: grep dec %t | count 2
+; RUN: grep lea %t | count 2
+
+; Optimize away zext-inreg and sext-inreg on the loop induction
+; variable using trip-count information.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 10
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 10
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}

diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
new file mode 100644
index 0000000..f23c020
--- /dev/null
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll

@@ -0,0 +1,386 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep and %t | count 6
+; RUN: grep movzb %t | count 6
+; RUN: grep sar %t | count 12
+
+; Don't optimize away zext-inreg and sext-inreg on the loop induction
+; variable, because it isn't safe to do so in these cases.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 20
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 20
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, %n
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, %n
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @yet_another_count_up(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @still_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 0
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 0
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+
+

diff --git a/test/CodeGen/X86/maskmovdqu.ll b/test/CodeGen/X86/maskmovdqu.ll
new file mode 100644
index 0000000..7796f0e
--- /dev/null
+++ b/test/CodeGen/X86/maskmovdqu.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
+; rdar://6573467
+
+define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
+entry:
+	tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c )
+	ret void
+}
+
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind

diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
new file mode 100644
index 0000000..b90d2e2
--- /dev/null
+++ b/test/CodeGen/X86/memcmp.ll

@@ -0,0 +1,110 @@
+; RUN: llc %s -o - -march=x86-64 | FileCheck %s
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+@.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1]
+
+declare i32 @memcmp(...)
+
+define void @memcmp2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2:
+; CHECK: movw    (%rsi), %ax
+; CHECK: cmpw    %ax, (%rdi)
+}
+
+define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2a:
+; CHECK: cmpw    $28527, (%rdi)
+}
+
+
+define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4:
+; CHECK: movl    (%rsi), %eax
+; CHECK: cmpl    %eax, (%rdi)
+}
+
+define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4a:
+; CHECK: cmpl $1869573999, (%rdi)
+}
+
+define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp8:
+; CHECK: movq    (%rsi), %rax
+; CHECK: cmpq    %rax, (%rdi)
+}
+
+define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp8a:
+; CHECK: movabsq $8029759185026510694, %rax
+; CHECK: cmpq	%rax, (%rdi)
+}
+

diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
new file mode 100644
index 0000000..2dc939e
--- /dev/null
+++ b/test/CodeGen/X86/memcpy-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
+
+	%struct.ParmT = type { [25 x i8], i8, i8* }
+@.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"		; <[25 x i8]*> [#uses=1]
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+define void @t(i32 %argc, i8** %argv) nounwind  {
+entry:
+	%parms.i = alloca [13 x %struct.ParmT]		; <[13 x %struct.ParmT]*> [#uses=1]
+	%parms1.i = getelementptr [13 x %struct.ParmT]* %parms.i, i32 0, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %parms1.i, i8* getelementptr ([25 x i8]* @.str12, i32 0, i32 0), i32 25, i32 1 ) nounwind 
+	unreachable
+}

diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
new file mode 100644
index 0000000..24530cd
--- /dev/null
+++ b/test/CodeGen/X86/memcpy.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 | grep call.*memcpy | count 2
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
+
+define i8* @my_memcpy(i8* %a, i8* %b, i64 %n) {
+entry:
+	tail call void @llvm.memcpy.i64( i8* %a, i8* %b, i64 %n, i32 1 )
+	ret i8* %a
+}
+
+define i8* @my_memcpy2(i64* %a, i64* %b, i64 %n) {
+entry:
+	%tmp14 = bitcast i64* %a to i8*
+	%tmp25 = bitcast i64* %b to i8*
+	tail call void @llvm.memcpy.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8 )
+	ret i8* %tmp14
+}

diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
new file mode 100644
index 0000000..d405068
--- /dev/null
+++ b/test/CodeGen/X86/memmove-0.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memcpy}
+
+declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
+
+define void @foo(i8* noalias %d, i8* noalias %s, i64 %l)
+{
+  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
+  ret void
+}

diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
new file mode 100644
index 0000000..2057be8
--- /dev/null
+++ b/test/CodeGen/X86/memmove-1.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+
+declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
+
+define void @foo(i8* %d, i8* %s, i64 %l)
+{
+  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
+  ret void
+}

diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
new file mode 100644
index 0000000..68a9f4d
--- /dev/null
+++ b/test/CodeGen/X86/memmove-2.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
+
+declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
+
+define void @foo(i8* noalias %d, i8* noalias %s)
+{
+  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
+  ret void
+}

diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
new file mode 100644
index 0000000..d8a419c
--- /dev/null
+++ b/test/CodeGen/X86/memmove-3.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call	memmove}
+
+declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
+
+define void @foo(i8* %d, i8* %s)
+{
+  call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
+  ret void
+}

diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
new file mode 100644
index 0000000..027db1f
--- /dev/null
+++ b/test/CodeGen/X86/memmove-4.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep call
+
+target triple = "i686-pc-linux-gnu"
+
+define void @a(i8* %a, i8* %b) nounwind {
+        %tmp2 = bitcast i8* %a to i8*
+        %tmp3 = bitcast i8* %b to i8*
+        tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
+        ret void
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)

diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
new file mode 100644
index 0000000..7deb52f
--- /dev/null
+++ b/test/CodeGen/X86/memset-2.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s | not grep rep
+; RUN: llc < %s | grep memset
+
+target triple = "i386"
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+
+define fastcc i32 @cli_scanzip(i32 %desc) nounwind {
+entry:
+	br label %bb8.i.i.i.i
+
+bb8.i.i.i.i:		; preds = %bb8.i.i.i.i, %entry
+	icmp eq i32 0, 0		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i
+
+bb32.i.i.i:		; preds = %bb61.i.i.i
+	ptrtoint i8* %tail.0.i.i.i to i32		; <i32>:1 [#uses=1]
+	sub i32 0, %1		; <i32>:2 [#uses=1]
+	icmp sgt i32 %2, 19		; <i1>:3 [#uses=1]
+	br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i
+
+bb34.i.i.i:		; preds = %bb32.i.i.i
+	load i32* null, align 4		; <i32>:4 [#uses=1]
+	icmp eq i32 %4, 101010256		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i
+
+bb8.i11.i.i.i:		; preds = %bb8.i11.i.i.i, %bb34.i.i.i
+	icmp eq i32 0, 0		; <i1>:6 [#uses=1]
+	br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i
+
+cli_dbgmsg.exit49.i:		; preds = %bb8.i11.i.i.i
+	icmp eq [32768 x i8]* null, null		; <i1>:7 [#uses=1]
+	br i1 %7, label %bb1.i28.i, label %bb8.i.i
+
+bb61.i.i.i:		; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i
+	%tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0		; <i8*> [#uses=2]
+	load i8* %tail.0.i.i.i, align 1		; <i8>:8 [#uses=1]
+	icmp eq i8 %8, 80		; <i1>:9 [#uses=1]
+	br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i
+
+bb1.i28.i:		; preds = %cli_dbgmsg.exit49.i
+	call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind
+	unreachable
+
+bb8.i.i:		; preds = %bb8.i.i, %cli_dbgmsg.exit49.i
+	br label %bb8.i.i
+}

diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
new file mode 100644
index 0000000..cf7464d
--- /dev/null
+++ b/test/CodeGen/X86/memset.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+
+	%struct.x = type { i16, i16 }
+
+define void @t() nounwind  {
+entry:
+	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
+	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
+	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
+	call void @llvm.memset.i64( i8* %tmp110117, i8 0, i64 32, i32 8 )
+	call void @foo( %struct.x* %up_mvd116 ) nounwind 
+	ret void
+}
+
+declare void @foo(%struct.x*)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind 

diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
new file mode 100644
index 0000000..da8fc51
--- /dev/null
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep stosl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq | count 10
+
+define void @bork() nounwind {
+entry:
+        call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
+        ret void
+}
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
+

diff --git a/test/CodeGen/X86/mfence.ll b/test/CodeGen/X86/mfence.ll
new file mode 100644
index 0000000..a1b2283
--- /dev/null
+++ b/test/CodeGen/X86/mfence.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mfence
+
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 false, i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 false, i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true,  i1 true)
+
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 true,  i1 false, i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 true,  i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 true,  i1 true)
+
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 true)
+	ret void
+}

diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
new file mode 100644
index 0000000..7dcd84d
--- /dev/null
+++ b/test/CodeGen/X86/mingw-alloca.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i386-pc-mingw32"
+
+define void @foo1(i32 %N) nounwind {
+entry:
+; CHECK: _foo1:
+; CHECK: call __alloca
+	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
+	call void @bar1( i32* %tmp14 )
+	ret void
+}
+
+declare void @bar1(i32*)
+
+define void @foo2(i32 inreg  %N) nounwind {
+entry:
+; CHECK: _foo2:
+; CHECK: andl $-16, %esp
+; CHECK: pushl %eax
+; CHECK: call __alloca
+; CHECK: movl	8028(%esp), %eax
+	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
+	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
+	call void @bar2( i32* %A2.sub, i32 %N )
+	ret void
+}
+
+declare void @bar2(i32*, i32)

diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
new file mode 100644
index 0000000..426e98e
--- /dev/null
+++ b/test/CodeGen/X86/mmx-arg-passing.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
+;
+; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
+; On Darwin x86-32, v1i64 values are passed in memory.
+; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
+; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
+
+@u1 = external global <8 x i8>
+
+define void @t1(<8 x i8> %v1) nounwind  {
+	store <8 x i8> %v1, <8 x i8>* @u1, align 8
+	ret void
+}
+
+@u2 = external global <1 x i64>
+
+define void @t2(<1 x i64> %v1) nounwind  {
+	store <1 x i64> %v1, <1 x i64>* @u2, align 8
+	ret void
+}

diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
new file mode 100644
index 0000000..c42af08
--- /dev/null
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+
+@g_v8qi = external global <8 x i8>
+
+define void @t1() nounwind  {
+	%tmp3 = load <8 x i8>* @g_v8qi, align 8
+	%tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+	ret void
+}
+
+define void @t2(<8 x i8> %v1, <8 x i8> %v2) nounwind  {
+       %tmp3 = add <8 x i8> %v1, %v2
+       %tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+       ret void
+}
+
+define void @t3() nounwind  {
+	call void @pass_v1di( <1 x i64> zeroinitializer )
+        ret void
+}
+
+declare i32 @pass_v8qi(...)
+declare void @pass_v1di(<1 x i64>)

diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
new file mode 100644
index 0000000..e4dfdbf
--- /dev/null
+++ b/test/CodeGen/X86/mmx-arith.ll

@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+
+;; A basic sanity check to make sure that MMX arithmetic actually compiles.
+
+define void @foo(<8 x i8>* %A, <8 x i8>* %B) {
+entry:
+	%tmp1 = load <8 x i8>* %A		; <<8 x i8>> [#uses=1]
+	%tmp3 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp4 = add <8 x i8> %tmp1, %tmp3		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp4, <8 x i8>* %A
+	%tmp7 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp12 = tail call <8 x i8> @llvm.x86.mmx.padds.b( <8 x i8> %tmp4, <8 x i8> %tmp7 )		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp12, <8 x i8>* %A
+	%tmp16 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp21 = tail call <8 x i8> @llvm.x86.mmx.paddus.b( <8 x i8> %tmp12, <8 x i8> %tmp16 )		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp21, <8 x i8>* %A
+	%tmp27 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp28 = sub <8 x i8> %tmp21, %tmp27		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp28, <8 x i8>* %A
+	%tmp31 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp36 = tail call <8 x i8> @llvm.x86.mmx.psubs.b( <8 x i8> %tmp28, <8 x i8> %tmp31 )		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp36, <8 x i8>* %A
+	%tmp40 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp45 = tail call <8 x i8> @llvm.x86.mmx.psubus.b( <8 x i8> %tmp36, <8 x i8> %tmp40 )		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp45, <8 x i8>* %A
+	%tmp51 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp52 = mul <8 x i8> %tmp45, %tmp51		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp52, <8 x i8>* %A
+	%tmp57 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp58 = and <8 x i8> %tmp52, %tmp57		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp58, <8 x i8>* %A
+	%tmp63 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp64 = or <8 x i8> %tmp58, %tmp63		; <<8 x i8>> [#uses=2]
+	store <8 x i8> %tmp64, <8 x i8>* %A
+	%tmp69 = load <8 x i8>* %B		; <<8 x i8>> [#uses=1]
+	%tmp70 = xor <8 x i8> %tmp64, %tmp69		; <<8 x i8>> [#uses=1]
+	store <8 x i8> %tmp70, <8 x i8>* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+define void @baz(<2 x i32>* %A, <2 x i32>* %B) {
+entry:
+	%tmp1 = load <2 x i32>* %A		; <<2 x i32>> [#uses=1]
+	%tmp3 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
+	%tmp4 = add <2 x i32> %tmp1, %tmp3		; <<2 x i32>> [#uses=2]
+	store <2 x i32> %tmp4, <2 x i32>* %A
+	%tmp9 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
+	%tmp10 = sub <2 x i32> %tmp4, %tmp9		; <<2 x i32>> [#uses=2]
+	store <2 x i32> %tmp10, <2 x i32>* %A
+	%tmp15 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
+	%tmp16 = mul <2 x i32> %tmp10, %tmp15		; <<2 x i32>> [#uses=2]
+	store <2 x i32> %tmp16, <2 x i32>* %A
+	%tmp21 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
+	%tmp22 = and <2 x i32> %tmp16, %tmp21		; <<2 x i32>> [#uses=2]
+	store <2 x i32> %tmp22, <2 x i32>* %A
+	%tmp27 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
+	%tmp28 = or <2 x i32> %tmp22, %tmp27		; <<2 x i32>> [#uses=2]
+	store <2 x i32> %tmp28, <2 x i32>* %A
+	%tmp33 = load <2 x i32>* %B		; <<2 x i32>> [#uses=1]
+	%tmp34 = xor <2 x i32> %tmp28, %tmp33		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %tmp34, <2 x i32>* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+define void @bar(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+	%tmp1 = load <4 x i16>* %A		; <<4 x i16>> [#uses=1]
+	%tmp3 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp4 = add <4 x i16> %tmp1, %tmp3		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp4, <4 x i16>* %A
+	%tmp7 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp12 = tail call <4 x i16> @llvm.x86.mmx.padds.w( <4 x i16> %tmp4, <4 x i16> %tmp7 )		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp12, <4 x i16>* %A
+	%tmp16 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp21 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp12, <4 x i16> %tmp16 )		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp21, <4 x i16>* %A
+	%tmp27 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp28 = sub <4 x i16> %tmp21, %tmp27		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp28, <4 x i16>* %A
+	%tmp31 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp36 = tail call <4 x i16> @llvm.x86.mmx.psubs.w( <4 x i16> %tmp28, <4 x i16> %tmp31 )		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp36, <4 x i16>* %A
+	%tmp40 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp45 = tail call <4 x i16> @llvm.x86.mmx.psubus.w( <4 x i16> %tmp36, <4 x i16> %tmp40 )		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp45, <4 x i16>* %A
+	%tmp51 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp52 = mul <4 x i16> %tmp45, %tmp51		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp52, <4 x i16>* %A
+	%tmp55 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp60 = tail call <4 x i16> @llvm.x86.mmx.pmulh.w( <4 x i16> %tmp52, <4 x i16> %tmp55 )		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp60, <4 x i16>* %A
+	%tmp64 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp69 = tail call <2 x i32> @llvm.x86.mmx.pmadd.wd( <4 x i16> %tmp60, <4 x i16> %tmp64 )		; <<2 x i32>> [#uses=1]
+	%tmp70 = bitcast <2 x i32> %tmp69 to <4 x i16>		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp70, <4 x i16>* %A
+	%tmp75 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp76 = and <4 x i16> %tmp70, %tmp75		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp76, <4 x i16>* %A
+	%tmp81 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp82 = or <4 x i16> %tmp76, %tmp81		; <<4 x i16>> [#uses=2]
+	store <4 x i16> %tmp82, <4 x i16>* %A
+	%tmp87 = load <4 x i16>* %B		; <<4 x i16>> [#uses=1]
+	%tmp88 = xor <4 x i16> %tmp82, %tmp87		; <<4 x i16>> [#uses=1]
+	store <4 x i16> %tmp88, <4 x i16>* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare <8 x i8> @llvm.x86.mmx.padds.b(<8 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.x86.mmx.paddus.b(<8 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.x86.mmx.psubs.b(<8 x i8>, <8 x i8>)
+
+declare <8 x i8> @llvm.x86.mmx.psubus.b(<8 x i8>, <8 x i8>)
+
+declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>)
+
+declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+
+declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>)
+
+declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>)
+
+declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>)
+
+declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>)
+
+declare void @llvm.x86.mmx.emms()

diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
new file mode 100644
index 0000000..1fd8f67
--- /dev/null
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 4
+
+define i64 @foo(<1 x i64>* %p) {
+  %t = load <1 x i64>* %p
+  %u = add <1 x i64> %t, %t
+  %s = bitcast <1 x i64> %u to i64
+  ret i64 %s
+}
+define i64 @goo(<2 x i32>* %p) {
+  %t = load <2 x i32>* %p
+  %u = add <2 x i32> %t, %t
+  %s = bitcast <2 x i32> %u to i64
+  ret i64 %s
+}
+define i64 @hoo(<4 x i16>* %p) {
+  %t = load <4 x i16>* %p
+  %u = add <4 x i16> %t, %t
+  %s = bitcast <4 x i16> %u to i64
+  ret i64 %s
+}
+define i64 @ioo(<8 x i8>* %p) {
+  %t = load <8 x i8>* %p
+  %u = add <8 x i8> %t, %t
+  %s = bitcast <8 x i8> %u to i64
+  ret i64 %s
+}

diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
new file mode 100644
index 0000000..3607043
--- /dev/null
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64           | grep {movq.*(%rsi), %rax}
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
+
+; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
+; increases the places that need to use emms.
+
+; rdar://5741668
+
+define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
+entry:
+	%tmp1 = load <1 x i64>* %y, align 8		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp1, <1 x i64>* %x, align 8
+	ret void
+}

diff --git a/test/CodeGen/X86/mmx-emms.ll b/test/CodeGen/X86/mmx-emms.ll
new file mode 100644
index 0000000..5ff2588
--- /dev/null
+++ b/test/CodeGen/X86/mmx-emms.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms
+define void @foo() {
+entry:
+	call void @llvm.x86.mmx.emms( )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.x86.mmx.emms()

diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
new file mode 100644
index 0000000..a063ee1
--- /dev/null
+++ b/test/CodeGen/X86/mmx-insert-element.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
+
+define <2 x i32> @qux(i32 %A) nounwind {
+	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]
+	ret <2 x i32> %tmp3
+}

diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
new file mode 100644
index 0000000..3af09f4
--- /dev/null
+++ b/test/CodeGen/X86/mmx-pinsrw.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; PR2562
+
+external global i16		; <i16*>:0 [#uses=1]
+external global <4 x i16>		; <<4 x i16>*>:1 [#uses=2]
+
+declare void @abort()
+
+define void @""() {
+	load i16* @0		; <i16>:1 [#uses=1]
+	load <4 x i16>* @1		; <<4 x i16>>:2 [#uses=1]
+	insertelement <4 x i16> %2, i16 %1, i32 0		; <<4 x i16>>:3 [#uses=1]
+	store <4 x i16> %3, <4 x i16>* @1
+	ret void
+}

diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
new file mode 100644
index 0000000..0af7e01
--- /dev/null
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+
+define void @bork(<1 x i64>* %x) {
+entry:
+	%tmp2 = load <1 x i64>* %x		; <<1 x i64>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >		; <<2 x i32>> [#uses=1]
+	%tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp10, <1 x i64>* %x
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare void @llvm.x86.mmx.emms()

diff --git a/test/CodeGen/X86/mmx-s2v.ll b/test/CodeGen/X86/mmx-s2v.ll
new file mode 100644
index 0000000..c98023c
--- /dev/null
+++ b/test/CodeGen/X86/mmx-s2v.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+; PR2574
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
+        br i1 true, label %bb.nph, label %._crit_edge
+
+bb.nph:         ; preds = %bb.nph, %0
+        %t2206f2.0 = phi <2 x float> [ %2, %bb.nph ], [ undef, %0 ]             ; <<2 x float>> [#uses=1]
+        insertelement <2 x float> %t2206f2.0, float 0.000000e+00, i32 0         ; <<2 x float>>:1 [#uses=1]
+        insertelement <2 x float> %1, float 0.000000e+00, i32 1         ; <<2 x float>>:2 [#uses=1]
+        br label %bb.nph
+
+._crit_edge:            ; preds = %0
+        ret void
+}

diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
new file mode 100644
index 0000000..dd0aa2c
--- /dev/null
+++ b/test/CodeGen/X86/mmx-shift.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psrad
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psrlw
+
+define i64 @t1(<1 x i64> %mm1) nounwind  {
+entry:
+	%tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 )		; <<1 x i64>> [#uses=1]
+	%retval1112 = bitcast <1 x i64> %tmp6 to i64		; <i64> [#uses=1]
+	ret i64 %retval1112
+}
+
+declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
+
+define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind  {
+entry:
+	%tmp7 = tail call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %mm1, <2 x i32> %mm2 ) nounwind readnone 		; <<2 x i32>> [#uses=1]
+	%retval1112 = bitcast <2 x i32> %tmp7 to i64		; <i64> [#uses=1]
+	ret i64 %retval1112
+}
+
+declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
+
+define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind  {
+entry:
+	%tmp6 = bitcast <1 x i64> %mm1 to <4 x i16>		; <<4 x i16>> [#uses=1]
+	%tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone 		; <<4 x i16>> [#uses=1]
+	%retval1314 = bitcast <4 x i16> %tmp8 to i64		; <i64> [#uses=1]
+	ret i64 %retval1314
+}
+
+declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 

diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
new file mode 100644
index 0000000..e3125c7
--- /dev/null
+++ b/test/CodeGen/X86/mmx-shuffle.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -mcpu=yonah
+; PR1427
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QClipData = type { i32, "struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
+	"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
+	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
+	%struct.QRect = type { i32, i32, i32, i32 }
+	%struct.QRegion = type { "struct.QRegion::QRegionData"* }
+	"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
+	%struct.QRegionPrivate = type opaque
+	%struct.QT_FT_Span = type { i16, i16, i16, i8 }
+	%struct._XRegion = type opaque
+
+define void @_Z19qt_bitmapblit16_sseP13QRasterBufferiijPKhiii(%struct.QRasterBuffer* %rasterBuffer, i32 %x, i32 %y, i32 %color, i8* %src, i32 %width, i32 %height, i32 %stride) {
+entry:
+	%tmp528 = bitcast <8 x i8> zeroinitializer to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%tmp529 = and <2 x i32> %tmp528, bitcast (<4 x i16> < i16 -32640, i16 16448, i16 8224, i16 4112 > to <2 x i32>)		; <<2 x i32>> [#uses=1]
+	%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16>		; <<4 x i16>> [#uses=1]
+	%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 >		; <<4 x i16>> [#uses=1]
+	%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8>		; <<8 x i8>> [#uses=1]
+	tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> %tmp555, i8* null )
+	ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)

diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
new file mode 100644
index 0000000..8253c20
--- /dev/null
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
+
+	%struct.vS1024 = type { [8 x <4 x i32>] }
+	%struct.vS512 = type { [4 x <4 x i32>] }
+
+declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone
+
+define void @t() nounwind {
+entry:
+	br label %bb554
+
+bb554:		; preds = %bb554, %entry
+	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
+	%0 = load <1 x i64>* null, align 8		; <<1 x i64>> [#uses=2]
+	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
+	%2 = bitcast <2 x i32> %tmp555 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%3 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+        store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
+	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %2		; <<1 x i64>> [#uses=1]
+	%4 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %tmp558, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+	%tmp562 = add <1 x i64> %4, %3		; <<1 x i64>> [#uses=1]
+	br label %bb554
+}

diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
new file mode 100644
index 0000000..d21e240
--- /dev/null
+++ b/test/CodeGen/X86/mmx-vzmovl.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
+
+define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
+entry:
+	%0 = load <1 x i64>* %a, align 8		; <<1 x i64>> [#uses=1]
+	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%2 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
+	%3 = bitcast <2 x i32> %2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %3, <1 x i64>* %b, align 8
+	br label %bb2
+
+bb2:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/movfs.ll b/test/CodeGen/X86/movfs.ll
new file mode 100644
index 0000000..823e986
--- /dev/null
+++ b/test/CodeGen/X86/movfs.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep fs
+
+define i32 @foo() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
new file mode 100644
index 0000000..b04048b
--- /dev/null
+++ b/test/CodeGen/X86/movgs.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep gs
+
+define i32 @foo() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll
new file mode 100644
index 0000000..069737d
--- /dev/null
+++ b/test/CodeGen/X86/mul-legalize.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep 24576
+; PR2135
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@.str = constant [13 x i8] c"c45531m.adb\00\00"		
+
+define void @main() nounwind {
+entry:
+	%tmp1 = call i1 @report__equal( i32 3, i32 3 )		
+	%b.0 = select i1 %tmp1, i64 35184372088832, i64 0		
+	%tmp7 = mul i64 3, %b.0		
+	%tmp32 = icmp eq i64 %tmp7, 105553116266496		
+	br i1 %tmp32, label %return, label %bb35
+bb35:		
+	call void @abort( )
+	unreachable
+return:		
+	ret void
+}
+
+declare i1 @report__equal(i32 %x, i32 %y) nounwind
+
+declare void @abort()

diff --git a/test/CodeGen/X86/mul-remat.ll b/test/CodeGen/X86/mul-remat.ll
new file mode 100644
index 0000000..3fa0050
--- /dev/null
+++ b/test/CodeGen/X86/mul-remat.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+; PR1874
+	
+define i32 @test(i32 %a, i32 %b) {
+entry:
+	%tmp3 = mul i32 %b, %a
+	ret i32 %tmp3
+}

diff --git a/test/CodeGen/X86/mul-shift-reassoc.ll b/test/CodeGen/X86/mul-shift-reassoc.ll
new file mode 100644
index 0000000..3777d8b
--- /dev/null
+++ b/test/CodeGen/X86/mul-shift-reassoc.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | not grep add
+
+define i32 @test(i32 %X, i32 %Y) {
+	; Push the shl through the mul to allow an LEA to be formed, instead
+        ; of using a shift and add separately.
+        %tmp.2 = shl i32 %X, 1          ; <i32> [#uses=1]
+        %tmp.3 = mul i32 %tmp.2, %Y             ; <i32> [#uses=1]
+        %tmp.5 = add i32 %tmp.3, %Y             ; <i32> [#uses=1]
+        ret i32 %tmp.5
+}
+

diff --git a/test/CodeGen/X86/mul128.ll b/test/CodeGen/X86/mul128.ll
new file mode 100644
index 0000000..6825b99
--- /dev/null
+++ b/test/CodeGen/X86/mul128.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep mul | count 3
+
+define i128 @foo(i128 %t, i128 %u) {
+  %k = mul i128 %t, %u
+  ret i128 %k
+}

diff --git a/test/CodeGen/X86/mul64.ll b/test/CodeGen/X86/mul64.ll
new file mode 100644
index 0000000..5a25c5d
--- /dev/null
+++ b/test/CodeGen/X86/mul64.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 | grep mul | count 3
+
+define i64 @foo(i64 %t, i64 %u) {
+  %k = mul i64 %t, %u
+  ret i64 %k
+}

diff --git a/test/CodeGen/X86/multiple-return-values-cross-block.ll b/test/CodeGen/X86/multiple-return-values-cross-block.ll
new file mode 100644
index 0000000..e9837d0
--- /dev/null
+++ b/test/CodeGen/X86/multiple-return-values-cross-block.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+
+declare {x86_fp80, x86_fp80} @test()
+
+define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %b, x86_fp80* %P1
+br label %L
+
+L:
+  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void
+}

diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
new file mode 100644
index 0000000..018d997
--- /dev/null
+++ b/test/CodeGen/X86/multiple-return-values.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86
+
+define {i64, float} @bar(i64 %a, float %b) {
+        %y = add i64 %a, 7
+        %z = fadd float %b, 7.0
+	ret i64 %y, float %z
+}
+
+define i64 @foo() {
+	%M = call {i64, float} @bar(i64 21, float 21.0)
+        %N = getresult {i64, float} %M, 0
+        %O = getresult {i64, float} %M, 1
+        %P = fptosi float %O to i64
+        %Q = add i64 %P, %N
+	ret i64 %Q
+}

diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
new file mode 100644
index 0000000..82b7331
--- /dev/null
+++ b/test/CodeGen/X86/nancvt.ll

@@ -0,0 +1,183 @@
+; RUN: opt < %s -std-compile-opts | llc > %t
+; RUN: grep 2147027116 %t | count 3
+; RUN: grep 2147228864 %t | count 3
+; RUN: grep 2146502828 %t | count 3
+; RUN: grep 2143034560 %t | count 3
+; Compile time conversions of NaNs.
+; ModuleID = 'nan2.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+	%struct..0anon = type { float }
+	%struct..1anon = type { double }
+@fnan = constant [3 x i32] [ i32 2143831397, i32 2143831396, i32 2143831398 ]		; <[3 x i32]*> [#uses=1]
+@dnan = constant [3 x i64] [ i64 9223235251041752696, i64 9223235251041752697, i64 9223235250773317239 ], align 8		; <[3 x i64]*> [#uses=1]
+@fsnan = constant [3 x i32] [ i32 2139637093, i32 2139637092, i32 2139637094 ]		; <[3 x i32]*> [#uses=1]
+@dsnan = constant [3 x i64] [ i64 9220983451228067448, i64 9220983451228067449, i64 9220983450959631991 ], align 8		; <[3 x i64]*> [#uses=1]
+@.str = internal constant [10 x i8] c"%08x%08x\0A\00"		; <[10 x i8]*> [#uses=2]
+@.str1 = internal constant [6 x i8] c"%08x\0A\00"		; <[6 x i8]*> [#uses=2]
+
+@var = external global i32
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=20]
+	%uf = alloca %struct..0anon, align 4		; <%struct..0anon*> [#uses=8]
+	%ud = alloca %struct..1anon, align 8		; <%struct..1anon*> [#uses=10]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 0, i32* %i, align 4
+	br label %bb23
+
+bb:		; preds = %bb23
+	%tmp = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp1 = getelementptr [3 x i32]* @fnan, i32 0, i32 %tmp		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp3 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp34 = bitcast float* %tmp3 to i32*		; <i32*> [#uses=1]
+	store i32 %tmp2, i32* %tmp34, align 4
+	%tmp5 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp6 = load float* %tmp5, align 4		; <float> [#uses=1]
+	%tmp67 = fpext float %tmp6 to double		; <double> [#uses=1]
+	%tmp8 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp67, double* %tmp8, align 8
+	%tmp9 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp910 = bitcast double* %tmp9 to i64*		; <i64*> [#uses=1]
+	%tmp11 = load i64* %tmp910, align 8		; <i64> [#uses=1]
+	%tmp1112 = trunc i64 %tmp11 to i32		; <i32> [#uses=1]
+	%tmp13 = and i32 %tmp1112, -1		; <i32> [#uses=1]
+	%tmp14 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp1415 = bitcast double* %tmp14 to i64*		; <i64*> [#uses=1]
+	%tmp16 = load i64* %tmp1415, align 8		; <i64> [#uses=1]
+	%.cast = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
+	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
+	%tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp1718, i32* @var
+	volatile store i32 %tmp13, i32* @var
+	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
+	store i32 %tmp22, i32* %i, align 4
+	br label %bb23
+
+bb23:		; preds = %bb, %entry
+	%tmp24 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp25 = icmp sle i32 %tmp24, 2		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb27
+
+bb27:		; preds = %bb23
+	store i32 0, i32* %i, align 4
+	br label %bb46
+
+bb28:		; preds = %bb46
+	%tmp29 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp30 = getelementptr [3 x i64]* @dnan, i32 0, i32 %tmp29		; <i64*> [#uses=1]
+	%tmp31 = load i64* %tmp30, align 8		; <i64> [#uses=1]
+	%tmp32 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp3233 = bitcast double* %tmp32 to i64*		; <i64*> [#uses=1]
+	store i64 %tmp31, i64* %tmp3233, align 8
+	%tmp35 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp36 = load double* %tmp35, align 8		; <double> [#uses=1]
+	%tmp3637 = fptrunc double %tmp36 to float		; <float> [#uses=1]
+	%tmp38 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	store float %tmp3637, float* %tmp38, align 4
+	%tmp39 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
+	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
+	%tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp41, i32* @var
+	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
+	store i32 %tmp45, i32* %i, align 4
+	br label %bb46
+
+bb46:		; preds = %bb28, %bb27
+	%tmp47 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp48 = icmp sle i32 %tmp47, 2		; <i1> [#uses=1]
+	%tmp4849 = zext i1 %tmp48 to i8		; <i8> [#uses=1]
+	%toBool50 = icmp ne i8 %tmp4849, 0		; <i1> [#uses=1]
+	br i1 %toBool50, label %bb28, label %bb51
+
+bb51:		; preds = %bb46
+	store i32 0, i32* %i, align 4
+	br label %bb78
+
+bb52:		; preds = %bb78
+	%tmp53 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp54 = getelementptr [3 x i32]* @fsnan, i32 0, i32 %tmp53		; <i32*> [#uses=1]
+	%tmp55 = load i32* %tmp54, align 4		; <i32> [#uses=1]
+	%tmp56 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp5657 = bitcast float* %tmp56 to i32*		; <i32*> [#uses=1]
+	store i32 %tmp55, i32* %tmp5657, align 4
+	%tmp58 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp59 = load float* %tmp58, align 4		; <float> [#uses=1]
+	%tmp5960 = fpext float %tmp59 to double		; <double> [#uses=1]
+	%tmp61 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp5960, double* %tmp61, align 8
+	%tmp62 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp6263 = bitcast double* %tmp62 to i64*		; <i64*> [#uses=1]
+	%tmp64 = load i64* %tmp6263, align 8		; <i64> [#uses=1]
+	%tmp6465 = trunc i64 %tmp64 to i32		; <i32> [#uses=1]
+	%tmp66 = and i32 %tmp6465, -1		; <i32> [#uses=1]
+	%tmp68 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp6869 = bitcast double* %tmp68 to i64*		; <i64*> [#uses=1]
+	%tmp70 = load i64* %tmp6869, align 8		; <i64> [#uses=1]
+	%.cast71 = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
+	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
+	%tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp7273, i32* @var
+	volatile store i32 %tmp66, i32* @var
+	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
+	store i32 %tmp77, i32* %i, align 4
+	br label %bb78
+
+bb78:		; preds = %bb52, %bb51
+	%tmp79 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp80 = icmp sle i32 %tmp79, 2		; <i1> [#uses=1]
+	%tmp8081 = zext i1 %tmp80 to i8		; <i8> [#uses=1]
+	%toBool82 = icmp ne i8 %tmp8081, 0		; <i1> [#uses=1]
+	br i1 %toBool82, label %bb52, label %bb83
+
+bb83:		; preds = %bb78
+	store i32 0, i32* %i, align 4
+	br label %bb101
+
+bb84:		; preds = %bb101
+	%tmp85 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp86 = getelementptr [3 x i64]* @dsnan, i32 0, i32 %tmp85		; <i64*> [#uses=1]
+	%tmp87 = load i64* %tmp86, align 8		; <i64> [#uses=1]
+	%tmp88 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp8889 = bitcast double* %tmp88 to i64*		; <i64*> [#uses=1]
+	store i64 %tmp87, i64* %tmp8889, align 8
+	%tmp90 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp91 = load double* %tmp90, align 8		; <double> [#uses=1]
+	%tmp9192 = fptrunc double %tmp91 to float		; <float> [#uses=1]
+	%tmp93 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	store float %tmp9192, float* %tmp93, align 4
+	%tmp94 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
+	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
+	%tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp96, i32* @var
+	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
+	store i32 %tmp100, i32* %i, align 4
+	br label %bb101
+
+bb101:		; preds = %bb84, %bb83
+	%tmp102 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp103 = icmp sle i32 %tmp102, 2		; <i1> [#uses=1]
+	%tmp103104 = zext i1 %tmp103 to i8		; <i8> [#uses=1]
+	%toBool105 = icmp ne i8 %tmp103104, 0		; <i1> [#uses=1]
+	br i1 %toBool105, label %bb84, label %bb106
+
+bb106:		; preds = %bb101
+	br label %return
+
+return:		; preds = %bb106
+	%retval107 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval107
+}

diff --git a/test/CodeGen/X86/narrow_op-1.ll b/test/CodeGen/X86/narrow_op-1.ll
new file mode 100644
index 0000000..18f1108
--- /dev/null
+++ b/test/CodeGen/X86/narrow_op-1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86-64 | grep orb | count 1
+; RUN: llc < %s -march=x86-64 | grep orb | grep 1
+; RUN: llc < %s -march=x86-64 | grep orl | count 1
+; RUN: llc < %s -march=x86-64 | grep orl | grep 16842752
+
+	%struct.bf = type { i64, i16, i16, i32 }
+@bfi = common global %struct.bf zeroinitializer, align 16
+
+define void @t1() nounwind optsize ssp {
+entry:
+	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%1 = or i32 %0, 65536
+	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	ret void
+}
+
+define void @t2() nounwind optsize ssp {
+entry:
+	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%1 = or i32 %0, 16842752
+	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	ret void
+}

diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
new file mode 100644
index 0000000..796ef7a
--- /dev/null
+++ b/test/CodeGen/X86/narrow_op-2.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+	%struct.bf = type { i64, i16, i16, i32 }
+@bfi = external global %struct.bf*
+
+define void @t1() nounwind ssp {
+entry:
+
+; CHECK: andb	$-2, 10(
+; CHECK: andb	$-3, 10(
+
+	%0 = load %struct.bf** @bfi, align 8
+	%1 = getelementptr %struct.bf* %0, i64 0, i32 1
+	%2 = bitcast i16* %1 to i32*
+	%3 = load i32* %2, align 1
+	%4 = and i32 %3, -65537
+	store i32 %4, i32* %2, align 1
+	%5 = load %struct.bf** @bfi, align 8
+	%6 = getelementptr %struct.bf* %5, i64 0, i32 1
+	%7 = bitcast i16* %6 to i32*
+	%8 = load i32* %7, align 1
+	%9 = and i32 %8, -131073
+	store i32 %9, i32* %7, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/neg-shl-add.ll b/test/CodeGen/X86/neg-shl-add.ll
new file mode 100644
index 0000000..7aebc38
--- /dev/null
+++ b/test/CodeGen/X86/neg-shl-add.ll

@@ -0,0 +1,17 @@
+; RUN: llc -march=x86-64 < %s | not grep negq
+
+; These sequences don't need neg instructions; they can be done with
+; a single shift and sub each.
+
+define i64 @foo(i64 %x, i64 %y, i64 %n) nounwind {
+  %a = sub i64 0, %y
+  %b = shl i64 %a, %n
+  %c = add i64 %b, %x
+  ret i64 %c
+}
+define i64 @boo(i64 %x, i64 %y, i64 %n) nounwind {
+  %a = sub i64 0, %y
+  %b = shl i64 %a, %n
+  %c = add i64 %x, %b
+  ret i64 %c
+}

diff --git a/test/CodeGen/X86/neg_fp.ll b/test/CodeGen/X86/neg_fp.ll
new file mode 100644
index 0000000..57164f2
--- /dev/null
+++ b/test/CodeGen/X86/neg_fp.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: grep xorps %t | count 1
+
+; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
+; -0 - (A - B) to (B - A) because A==B, -0 != 0
+
+define float @negfp(float %a, float %b) {
+entry:
+	%sub = fsub float %a, %b		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
+	ret float %neg
+}

diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
new file mode 100644
index 0000000..c3f412e
--- /dev/null
+++ b/test/CodeGen/X86/negate-add-zero.ll

@@ -0,0 +1,1145 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | not grep xor
+; PR3374
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.AtomList = type { %"struct.CDSListRep<IVMAtom*>"* }
+	%struct.AtomTree = type { %struct.IVM*, %"struct.CDSList<CDSList<HingeNode*> >" }
+	%"struct.CDS::DefaultAlloc" = type <{ i8 }>
+	%"struct.CDS::SingularError" = type { %"struct.CDS::exception" }
+	%"struct.CDS::auto_ptr<IVMAtom>" = type { %struct.IVMAtom* }
+	%"struct.CDS::exception" = type { [300 x i8] }
+	%"struct.CDSList<CDSList<HingeNode*> >" = type { %"struct.CDSListRep<CDSList<HingeNode*> >"* }
+	%"struct.CDSList<CDSList<int> >" = type { %"struct.CDSListRep<CDSList<int> >"* }
+	%"struct.CDSList<HingeNode*>" = type { %"struct.CDSListRep<HingeNode*>"* }
+	%"struct.CDSList<InternalDynamics::HingeSpec>" = type { %"struct.CDSListRep<InternalDynamics::HingeSpec>"* }
+	%"struct.CDSList<Loop>" = type { %"struct.CDSListRep<Loop>"* }
+	%"struct.CDSList<Pair<int, int> >" = type { %"struct.CDSListRep<Pair<int, int> >"* }
+	%"struct.CDSList<int>" = type { %"struct.CDSListRep<int>"* }
+	%"struct.CDSListRep<CDSList<HingeNode*> >" = type opaque
+	%"struct.CDSListRep<CDSList<int> >" = type opaque
+	%"struct.CDSListRep<HingeNode*>" = type { i32, i32, %struct.HingeNode**, i32 }
+	%"struct.CDSListRep<IVMAtom*>" = type { i32, i32, %struct.IVMAtom**, i32 }
+	%"struct.CDSListRep<InternalDynamics::HingeSpec>" = type opaque
+	%"struct.CDSListRep<Loop>" = type opaque
+	%"struct.CDSListRep<Pair<int, int> >" = type opaque
+	%"struct.CDSListRep<int>" = type { i32, i32, i32*, i32 }
+	%"struct.CDSMatrixBase<double>" = type { %"struct.CDSMatrixRep<double>"* }
+	%"struct.CDSMatrixRep<double>" = type opaque
+	%"struct.CDSStringRep<char>" = type { i8*, i32, i32, i32, i32 }
+	%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" = type { %"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>" }
+	%"struct.CDSVector<double,0,CDS::DefaultAlloc>" = type { %"struct.CDSVectorBase<double,CDS::DefaultAlloc>" }
+	%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>" = type { %"struct.CDSVectorRep<Vec3,CDS::DefaultAlloc>"* }
+	%"struct.CDSVectorBase<double,CDS::DefaultAlloc>" = type { %"struct.CDSVectorRep<double,CDS::DefaultAlloc>"* }
+	%"struct.CDSVectorRep<Vec3,CDS::DefaultAlloc>" = type { i32, %"struct.CDS::DefaultAlloc", %struct.Vec3*, i32 }
+	%"struct.CDSVectorRep<double,CDS::DefaultAlloc>" = type { i32, %"struct.CDS::DefaultAlloc", double*, i32 }
+	%"struct.FixedMatrix<double,1,1,0,0>" = type { %"struct.FixedMatrixBase<double,1,1>" }
+	%"struct.FixedMatrix<double,1,3,0,0>" = type { %"struct.FixedMatrixBase<double,1,3>" }
+	%"struct.FixedMatrix<double,1,6,0,0>" = type { %"struct.FixedMatrixBase<double,1,6>" }
+	%"struct.FixedMatrix<double,2,2,0,0>" = type { %"struct.FixedMatrixBase<double,2,2>" }
+	%"struct.FixedMatrix<double,2,6,0,0>" = type { %"struct.FixedMatrixBase<double,2,6>" }
+	%"struct.FixedMatrix<double,3,3,0,0>" = type { %"struct.FixedMatrixBase<double,3,3>" }
+	%"struct.FixedMatrix<double,3,6,0,0>" = type { %"struct.FixedMatrixBase<double,3,6>" }
+	%"struct.FixedMatrix<double,5,5,0,0>" = type { %"struct.FixedMatrixBase<double,5,5>" }
+	%"struct.FixedMatrix<double,5,6,0,0>" = type { %"struct.FixedMatrixBase<double,5,6>" }
+	%"struct.FixedMatrixBase<double,1,1>" = type { [1 x double] }
+	%"struct.FixedMatrixBase<double,1,3>" = type { [3 x double] }
+	%"struct.FixedMatrixBase<double,1,6>" = type { [6 x double] }
+	%"struct.FixedMatrixBase<double,2,2>" = type { [4 x double] }
+	%"struct.FixedMatrixBase<double,2,6>" = type { [12 x double] }
+	%"struct.FixedMatrixBase<double,3,3>" = type { [9 x double] }
+	%"struct.FixedMatrixBase<double,3,6>" = type { [18 x double] }
+	%"struct.FixedMatrixBase<double,5,5>" = type { [25 x double] }
+	%"struct.FixedMatrixBase<double,5,6>" = type { [30 x double] }
+	%"struct.FixedMatrixBase<double,6,6>" = type { [36 x double] }
+	%"struct.FixedVector<double,2,0>" = type { %"struct.FixedVectorBase<double,2>" }
+	%"struct.FixedVector<double,5,0>" = type { %"struct.FixedVectorBase<double,5>" }
+	%"struct.FixedVectorBase<double,2>" = type { [2 x double] }
+	%"struct.FixedVectorBase<double,5>" = type { [5 x double] }
+	%struct.HNodeOrigin = type { %struct.HingeNode }
+	%struct.HNodeRotate2 = type { %"struct.HingeNodeSpec<2>", %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Mat3, %struct.Mat3, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeRotate3 = type { %"struct.HingeNodeSpec<3>", %struct.Vec4, %struct.Vec4, %struct.Vec4, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", double, double, double, double, double, double, i8 }
+	%struct.HNodeTorsion = type { %"struct.HingeNodeSpec<1>", %struct.Vec3, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", %struct.Vec3, %struct.Mat3 }
+	%struct.HNodeTranslate = type { %"struct.HingeNodeSpec<3>", %struct.IVMAtom*, %struct.Vec3, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeTranslateRotate2 = type { %"struct.HingeNodeSpec<5>", %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Mat3, %struct.Mat3, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeTranslateRotate3 = type { %"struct.HingeNodeSpec<6>", %struct.Vec4, %struct.Vec4, %struct.Vec4, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", double, double, double, double, double, double, i8 }
+	%struct.HingeNode = type { i32 (...)**, %struct.HingeNode*, %"struct.CDSList<HingeNode*>", i32, %struct.AtomList, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.PhiMatrix, %struct.Mat6, %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat3, %struct.Mat6, %struct.IVM*, %struct.IVMAtom* }
+	%"struct.HingeNodeSpec<1>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,6,0,0>" }
+	%"struct.HingeNodeSpec<2>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedMatrix<double,2,2,0,0>", %"struct.FixedMatrix<double,2,6,0,0>" }
+	%"struct.HingeNodeSpec<3>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,3,6,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,3,3,0,0>", %"struct.FixedMatrix<double,3,6,0,0>" }
+	%"struct.HingeNodeSpec<5>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedMatrix<double,5,6,0,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedMatrix<double,5,5,0,0>", %"struct.FixedMatrix<double,5,6,0,0>" }
+	%"struct.HingeNodeSpec<6>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %struct.Mat6 }
+	%struct.IVM = type { i32 (...)**, %struct.AtomTree*, %struct.Integrator*, %struct.LengthConstraints*, i32, i32, i32, i8, i8, i8, i8, double, double, double, double, double, double, double, double, double, i32, double, double, double, double, double, double, %"struct.CDSList<Loop>", %"struct.CDSList<Pair<int, int> >", %struct.AtomList, %"struct.CDSList<CDSList<int> >", %"struct.CDSList<InternalDynamics::HingeSpec>", %struct.String, %"struct.CDSList<int>", i32 (%"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)*, double (%"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)*, i32 (%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*)*, double (%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*)* }
+	%struct.IVMAtom = type { i32, %struct.HingeNode*, %struct.AtomList, %struct.Vec3, %struct.Vec3, %struct.Vec3, double, double }
+	%struct.InertiaTensor = type { %struct.Mat3 }
+	%struct.Integrator = type { i32 (...)**, %"struct.CDSVector<double,0,CDS::DefaultAlloc>", %"struct.CDSVector<double,0,CDS::DefaultAlloc>", %struct.IVM* }
+	%"struct.InternalDynamics::HingeSpec" = type { %struct.String, i32, i32, %"struct.CDSList<int>" }
+	%struct.LengthConstraints = type { double, i32, i32, %struct.IVM*, %struct.LengthConstraintsPrivates* }
+	%struct.LengthConstraintsPrivates = type opaque
+	%struct.Mat3 = type { %"struct.FixedMatrix<double,3,3,0,0>" }
+	%struct.Mat6 = type { %"struct.FixedMatrixBase<double,6,6>" }
+	%"struct.MatrixTools::InverseResults<FullMatrix<double> >" = type { %"struct.CDSVector<double,0,CDS::DefaultAlloc>", i32 }
+	%struct.PhiMatrix = type { %struct.Vec3 }
+	%struct.PhiMatrixTranspose = type { %struct.PhiMatrix* }
+	%struct.RMat = type { %"struct.CDSMatrixBase<double>" }
+	%struct.String = type { %"struct.CDSStringRep<char>"* }
+	%"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >" = type { %struct.Mat6*, i32, i32, i32, i32 }
+	%"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >" = type { %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, i32, i32 }
+	%"struct.SubVector<FixedVector<double, 6, 0> >" = type { %"struct.FixedMatrix<double,1,6,0,0>"*, i32, i32 }
+	%struct.Vec3 = type { %"struct.FixedMatrix<double,1,3,0,0>" }
+	%struct.Vec4 = type { %"struct.FixedMatrix<double,2,2,0,0>" }
+	%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+	%struct.__si_class_type_info_pseudo = type { %struct.__type_info_pseudo, %"struct.std::type_info"* }
+	%struct.__type_info_pseudo = type { i8*, i8* }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+@_ZN9HingeNode7DEG2RADE = external constant double, align 8		; <double*> [#uses=0]
+@"\01LC" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC1" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC2" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@"\01LC3" = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@"\01LC4" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC5" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC6" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC7" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC8" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@"\01LC9" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@"\01LC10" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@_ZStL8__ioinit = external global %"struct.CDS::DefaultAlloc"		; <%"struct.CDS::DefaultAlloc"*> [#uses=0]
+@__dso_handle = external global i8*		; <i8**> [#uses=0]
+@_ZTIN9HingeNode17VirtualBaseMethodE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTVN10__cxxabiv117__class_type_infoE = external constant [0 x i32 (...)*]		; <[0 x i32 (...)*]*> [#uses=0]
+@_ZTSN9HingeNode17VirtualBaseMethodE = external constant [32 x i8], align 4		; <[32 x i8]*> [#uses=0]
+@_ZTV9HingeNode = external constant [31 x i32 (...)*], align 32		; <[31 x i32 (...)*]*> [#uses=0]
+@_ZTI9HingeNode = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTS9HingeNode = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@_ZTV11HNodeOrigin = external constant [31 x i32 (...)*], align 32		; <[31 x i32 (...)*]*> [#uses=0]
+@_ZTI11HNodeOrigin = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTVN10__cxxabiv120__si_class_type_infoE = external constant [0 x i32 (...)*]		; <[0 x i32 (...)*]*> [#uses=0]
+@_ZTS11HNodeOrigin = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi1EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi1EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi1EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi3EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi3EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi3EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi2EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi2EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi2EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi6EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi6EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi6EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi5EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi5EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi5EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+@"\01LC11" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@"\01LC12" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC13" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@_ZSt4cerr = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+@"\01LC14" = external constant [29 x i8]		; <[29 x i8]*> [#uses=0]
+@"\01LC15" = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@"\01LC16" = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@"\01LC17" = external constant [21 x i8]		; <[21 x i8]*> [#uses=0]
+@"\01LC18" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC19" = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@"\01LC20" = external constant [42 x i8]		; <[42 x i8]*> [#uses=0]
+@_ZTIN16InternalDynamics9ExceptionE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTSN16InternalDynamics9ExceptionE = external constant [31 x i8], align 4		; <[31 x i8]*> [#uses=0]
+@_ZTIN3CDS13SingularErrorE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTSN3CDS13SingularErrorE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTIN3CDS9exceptionE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTSN3CDS9exceptionE = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@_ZTV12HNodeTorsion = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeTorsion = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeTorsion = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV12HNodeRotate3 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeRotate3 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeRotate3 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV12HNodeRotate2 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeRotate2 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeRotate2 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV21HNodeTranslateRotate3 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI21HNodeTranslateRotate3 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS21HNodeTranslateRotate3 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@_ZTV21HNodeTranslateRotate2 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI21HNodeTranslateRotate2 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS21HNodeTranslateRotate2 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@_ZTV14HNodeTranslate = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI14HNodeTranslate = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS14HNodeTranslate = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@"\01LC21" = external constant [31 x i8]		; <[31 x i8]*> [#uses=0]
+@"\01LC22" = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@"\01LC23" = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@"\01LC24" = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@"\01LC25" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC26" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC27" = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@"\01LC28" = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@"\01LC29" = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@"\01LC30" = external constant [41 x i8]		; <[41 x i8]*> [#uses=0]
+@llvm.global_ctors = external global [1 x { i32, void ()* }]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+declare void @_GLOBAL__I__ZN9HingeNode7DEG2RADE() section "__TEXT,__StaticInit,regular,pure_instructions"
+
+declare void @_ZN9HingeNode16velFromCartesianEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6offsetEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6getDOFEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6getDimEv(%struct.HingeNode*) nounwind
+
+declare double @_ZN9HingeNode8kineticEEv(%struct.HingeNode*) nounwind
+
+declare double @_ZN9HingeNode8approxKEEv(%struct.HingeNode*) nounwind
+
+declare i8* @_ZN9HingeNode4typeEv(%struct.HingeNode*) nounwind
+
+declare i8* @_ZN11HNodeOrigin4typeEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcPEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcZEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin9calcPandZEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin9calcAccelEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin17calcInternalForceEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin18prepareVelInternalEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin13propagateSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeOrigin*, %"struct.FixedMatrix<double,1,6,0,0>"*) nounwind
+
+declare void @_ZN11HNodeOrigin9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeOrigin*, %"struct.FixedMatrix<double,1,6,0,0>"*) nounwind
+
+declare void @_ZN11HNodeOrigin18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin5printEi(%struct.HNodeOrigin*, i32) nounwind
+
+declare void @_ZN11HNodeOrigin6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcYEv(%struct.HNodeOrigin*) nounwind
+
+declare i8* @_ZN14HNodeTranslate4typeEv(%struct.HNodeTranslate*) nounwind
+
+declare i8* @_ZN21HNodeTranslateRotate34typeEv(%struct.HNodeTranslateRotate3*) nounwind
+
+declare i32 @_ZNK21HNodeTranslateRotate36getDimEv(%struct.HNodeTranslateRotate3*) nounwind
+
+declare i8* @_ZN12HNodeRotate34typeEv(%struct.HNodeRotate3*) nounwind
+
+declare i32 @_ZNK12HNodeRotate36getDimEv(%struct.HNodeRotate3*) nounwind
+
+declare i8* @_ZN12HNodeRotate24typeEv(%struct.HNodeRotate2*) nounwind
+
+declare i32 @_ZNK12HNodeRotate26getDimEv(%struct.HNodeRotate2*) nounwind
+
+declare i8* @_ZN21HNodeTranslateRotate24typeEv(%struct.HNodeTranslateRotate2*) nounwind
+
+declare i32 @_ZNK21HNodeTranslateRotate26getDimEv(%struct.HNodeTranslateRotate2*) nounwind
+
+declare i8* @_ZN12HNodeTorsion4typeEv(%struct.HNodeTorsion*) nounwind
+
+declare fastcc double @_ZL12sumMassToTipPK9HingeNode(%struct.HingeNode*)
+
+declare void @_ZN13InertiaTensor4calcERK4Vec3RK7CDSListIP7IVMAtomE(%struct.InertiaTensor*, %struct.Vec3*, %struct.AtomList*) nounwind
+
+declare fastcc double @_ZL15sumInertiaToTipPK9HingeNodeRK4Vec3S4_(%struct.HingeNode*, %struct.Vec3*, %struct.Vec3*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsI11FixedVectorIdLi6ELi0EEERSoS2_RK9SubVectorIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.SubVector<FixedVector<double, 6, 0> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_St5_Setw(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, double)
+
+declare void @_Z14orthoTransformIdLi3ELi3EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,3,0,0>"*)
+
+declare void @_ZN12HNodeRotate27calcRotEv(%struct.HNodeRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate27calcRotEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZmlIdLi6ELi6EE11FixedVectorIT_XT0_ELi0EERK11FixedMatrixIS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.Mat6*)
+
+declare void @_ZmlIdLi6ELi6ELi3EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi2EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi5ELi6EE11FixedVectorIT_XT0_ELi0EERK11FixedMatrixIS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedVector<double,5,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi5EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate39setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate29setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate39setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate29setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6offsetEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi1EE5posCMEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi1EE4massEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcPandZEv(%"struct.HingeNodeSpec<1>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6getDOFEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6getDimEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi1EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6offsetEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi5EE5posCMEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi5EE4massEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcPandZEv(%"struct.HingeNodeSpec<5>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6getDOFEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6getDimEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi5EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6offsetEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi2EE5posCMEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi2EE4massEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcPandZEv(%"struct.HingeNodeSpec<2>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6getDOFEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6getDimEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi2EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6offsetEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi3EE5posCMEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi3EE4massEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcPandZEv(%"struct.HingeNodeSpec<3>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6getDOFEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6offsetEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi6EE5posCMEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi6EE4massEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcPandZEv(%"struct.HingeNodeSpec<6>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6getDOFEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6getDimEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi6EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6getDimEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_Z14orthoTransformIdLi6ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.Mat6*)
+
+declare double @_ZN13HingeNodeSpecILi1EE8kineticEEv(%"struct.HingeNodeSpec<1>"*)
+
+declare double @_ZN13HingeNodeSpecILi3EE8kineticEEv(%"struct.HingeNodeSpec<3>"*)
+
+declare double @_ZN13HingeNodeSpecILi2EE8kineticEEv(%"struct.HingeNodeSpec<2>"*)
+
+declare double @_ZN13HingeNodeSpecILi6EE8kineticEEv(%"struct.HingeNodeSpec<6>"*)
+
+declare double @_ZN13HingeNodeSpecILi5EE8kineticEEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZmlIdLi6ELi5ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_Z14orthoTransformIdLi3ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi1ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi5ELi5EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,5,5,0,0>"*)
+
+declare void @_Z14orthoTransformIdLi5ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,5,5,0,0>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_Z14orthoTransformIdLi2ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,2,2,0,0>"*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi1ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %struct.Mat6*)
+
+declare void @_ZmlIdLi5ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi5EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,5,5,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZmlIdLi2ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi2EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,2,2,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi3ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi3EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"struct.CDS::DefaultAlloc"*)
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) nounwind
+
+declare void @__tcf_0(i8* nocapture)
+
+declare void @_ZNSt8ios_base4InitD1Ev(%"struct.CDS::DefaultAlloc"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsRSoRK9HingeNode(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.HingeNode*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsRSoPK7IVMAtom(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.IVMAtom*)
+
+declare void @_ZN9HingeNode8addChildEPS_(%struct.HingeNode*, %struct.HingeNode*)
+
+declare void @_ZN7CDSListIP9HingeNodeE6appendES1_(%"struct.CDSList<HingeNode*>"*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNode4getHEv(%struct.RMat* noalias sret, %struct.HingeNode*)
+
+declare i8* @__cxa_allocate_exception(i32) nounwind
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn
+
+declare void @_ZN9HingeNode16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode9calcAccelEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode5printEi(%struct.HingeNode*, i32)
+
+declare void @_ZN9HingeNode18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HingeNode*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN9HingeNode6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode13propagateSVelERK11FixedVectorIdLi6ELi0EE(%struct.HingeNode*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN9HingeNode18prepareVelInternalEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode17calcInternalForceEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcYEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode9calcPandZEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcZEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcPEv(%struct.HingeNode*)
+
+declare double* @_ZNK9HingeNode4massEv(%struct.HingeNode*)
+
+declare %struct.Vec3* @_ZNK9HingeNode5posCMEv(%struct.HingeNode*)
+
+declare i8* @_Znam(i32)
+
+declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32, i32)
+
+declare i8* @_Znwm(i32)
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare void @_ZN7CDSListIP7IVMAtomEC1Eii(%struct.AtomList*, i32, i32)
+
+declare void @_ZN13CDSVectorBaseIdN3CDS12DefaultAllocEE8splitRepEv(%"struct.CDSVectorBase<double,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeTorsion16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTorsion*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate316getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate216getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate28getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate26getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate26getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate38getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate36getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate36getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate316getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate38getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate36getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate36getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate216getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate28getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate26getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate26getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEE8splitRepEv(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*)
+
+declare void @_ZN7CDSListIP7IVMAtomE8splitRepEv(%struct.AtomList*)
+
+declare void @_ZN7CDSListIP9HingeNodeE8splitRepEv(%"struct.CDSList<HingeNode*>"*)
+
+declare void @_ZdaPv(i8*) nounwind
+
+declare void @_ZSt9terminatev() noreturn nounwind
+
+declare void @_ZN9HingeNodeC2EPK3IVMP7IVMAtomPKS3_PS_(%struct.HingeNode*, %struct.IVM*, %struct.IVMAtom*, %struct.IVMAtom*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD1Ev(%struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD0Ev(%struct.HingeNode*)
+
+declare void @_ZN7CDSListIP7IVMAtomE6appendES1_(%struct.AtomList*, %struct.IVMAtom*)
+
+declare void @_ZN9HingeNodeC1EPK3IVMP7IVMAtomPKS3_PS_(%struct.HingeNode*, %struct.IVM*, %struct.IVMAtom*, %struct.IVMAtom*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD2Ev(%struct.HingeNode*)
+
+declare void @_ZN11HNodeOriginD0Ev(%struct.HNodeOrigin*)
+
+declare void @_ZN11HNodeOriginD1Ev(%struct.HNodeOrigin*)
+
+declare void @_ZN13HingeNodeSpecILi1EED0Ev(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EED1Ev(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcPEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcZEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcYEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE17calcInternalForceEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE18prepareVelInternalEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<1>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi1EE8approxKEEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<1>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5printEi(%"struct.HingeNodeSpec<1>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcAccelEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<1>"*)
+
+declare void @__cxa_pure_virtual() nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EED0Ev(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EED1Ev(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcPEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcZEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcYEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE17calcInternalForceEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18prepareVelInternalEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<3>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi3EE8approxKEEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<3>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5printEi(%"struct.HingeNodeSpec<3>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcAccelEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EED0Ev(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EED1Ev(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcPEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcZEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcYEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE17calcInternalForceEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE18prepareVelInternalEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<2>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi2EE8approxKEEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<2>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5printEi(%"struct.HingeNodeSpec<2>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcAccelEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EED0Ev(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EED1Ev(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcPEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcZEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcYEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE17calcInternalForceEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18prepareVelInternalEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<6>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi6EE8approxKEEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<6>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5printEi(%"struct.HingeNodeSpec<6>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcAccelEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EED0Ev(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EED1Ev(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcPEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcZEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcYEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE17calcInternalForceEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE18prepareVelInternalEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<5>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi5EE8approxKEEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<5>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5printEi(%"struct.HingeNodeSpec<5>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcAccelEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN12HNodeTorsion7calcRotEv(%struct.HNodeTorsion*)
+
+declare double @sin(double) nounwind readnone
+
+declare double @cos(double) nounwind readnone
+
+declare void @_ZN12HNodeRotate37calcRotEv(%struct.HNodeRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate37calcRotEv(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN9HingeNodeC2ERKS_(%struct.HingeNode*, %struct.HingeNode*)
+
+declare void @_ZN7CDSListIP9HingeNodeEC1ERKS2_(%"struct.CDSList<HingeNode*>"*, %"struct.CDSList<HingeNode*>"*)
+
+declare void @_ZN7CDSListIP7IVMAtomEC1ERKS2_(%struct.AtomList*, %struct.AtomList*)
+
+declare void @_ZN11HNodeOriginC2EPK9HingeNode(%struct.HNodeOrigin*, %struct.HingeNode*)
+
+declare void @_ZN13HingeNodeSpecILi1EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<1>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi3EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<3>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi2EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<2>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi6EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<6>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi5EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<5>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZplI4Vec3K11FixedVectorIdLi6ELi0EEET_RK9SubVectorIT0_ERKS4_(%struct.Vec3* noalias sret, %"struct.SubVector<FixedVector<double, 6, 0> >"*, %struct.Vec3*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi1ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate314setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeRotate3*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate214setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeRotate2*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN21HNodeTranslateRotate314setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeTranslateRotate3*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN21HNodeTranslateRotate214setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeTranslateRotate2*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcPropsEv(%"struct.HingeNodeSpec<1>"*)
+
+declare zeroext i8 @_ZNK3IVM12minimizationEv(%struct.IVM*)
+
+declare void @_Z8blockVecIdLi3ELi3EE11FixedVectorIT_XplT0_T1_ELi0EERKS0_IS1_XT0_ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN12HNodeTorsion11toCartesianEv(%struct.HNodeTorsion*)
+
+declare void @_ZN13HingeNodeSpecILi1EE18calcCartesianForceEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18calcCartesianForceEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE18calcCartesianForceEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18calcCartesianForceEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE18calcCartesianForceEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN12HNodeTorsion5calcHEv(%struct.HNodeTorsion*)
+
+declare void @_Z10blockMat12IdLi1ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi1ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi1ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_St13_Setprecision(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi6EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,6>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_c(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8 signext)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,3>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,1>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi1ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, i32, i32, i32)
+
+declare %"struct.FixedMatrixBase<double,6,6>"* @_ZN15FixedMatrixBaseIdLi6ELi6EEpLERKS0_(%"struct.FixedMatrixBase<double,6,6>"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcPropsEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%struct.Mat6* noalias sret, %struct.Mat6*)
+
+declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*)
+
+define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
+entry:
+	%0 = add i32 0, -1		; <i32> [#uses=1]
+	%1 = getelementptr double* null, i32 %0		; <double*> [#uses=1]
+	%2 = load double* %1, align 8		; <double> [#uses=1]
+	%3 = load double* null, align 8		; <double> [#uses=2]
+	%4 = load double* null, align 8		; <double> [#uses=2]
+	%5 = load double* null, align 8		; <double> [#uses=3]
+	%6 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%7 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=0]
+	%8 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
+	%9 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
+	%10 = load double* null, align 8		; <double> [#uses=2]
+	%11 = fsub double -0.000000e+00, %10		; <double> [#uses=1]
+	%12 = load double* null, align 8		; <double> [#uses=2]
+	%13 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=1]
+	%14 = load double* %13, align 8		; <double> [#uses=2]
+	%15 = fsub double -0.000000e+00, %14		; <double> [#uses=1]
+	%16 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%17 = load double* %16, align 8		; <double> [#uses=2]
+	%18 = fsub double -0.000000e+00, %17		; <double> [#uses=1]
+	%19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
+	%21 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 6		; <double*> [#uses=0]
+	%22 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 9		; <double*> [#uses=0]
+	%23 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=0]
+	%24 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 4		; <double*> [#uses=0]
+	%25 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 7		; <double*> [#uses=0]
+	%26 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 10		; <double*> [#uses=0]
+	%27 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
+	%28 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 5		; <double*> [#uses=0]
+	%29 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 8		; <double*> [#uses=0]
+	%30 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 11		; <double*> [#uses=0]
+	%31 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%32 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%33 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%34 = fmul double %17, %5		; <double> [#uses=1]
+	%35 = fadd double 0.000000e+00, %34		; <double> [#uses=1]
+	%36 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%37 = fmul double %14, %3		; <double> [#uses=1]
+	%38 = fadd double %36, %37		; <double> [#uses=1]
+	%39 = fmul double %12, %4		; <double> [#uses=1]
+	%40 = fadd double %38, %39		; <double> [#uses=1]
+	%41 = fmul double %5, %11		; <double> [#uses=1]
+	%42 = fadd double %40, %41		; <double> [#uses=2]
+	store double %42, double* %32, align 8
+	%43 = fmul double %2, %15		; <double> [#uses=1]
+	%44 = fadd double %43, 0.000000e+00		; <double> [#uses=1]
+	%45 = fmul double %3, %18		; <double> [#uses=1]
+	%46 = fadd double %44, %45		; <double> [#uses=1]
+	%47 = fmul double %10, %4		; <double> [#uses=1]
+	%48 = fadd double %46, %47		; <double> [#uses=1]
+	%49 = fmul double %12, %5		; <double> [#uses=1]
+	%50 = fadd double %48, %49		; <double> [#uses=2]
+	store double %50, double* %33, align 8
+	%51 = fmul double %35, 2.000000e+00		; <double> [#uses=1]
+	%52 = fmul double %42, 2.000000e+00		; <double> [#uses=1]
+	%53 = fmul double %50, 2.000000e+00		; <double> [#uses=1]
+	%54 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %51, double* %54, align 8
+	%55 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	store double %52, double* %55, align 8
+	%56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	store double %53, double* %56, align 8
+	%57 = add i32 0, 4		; <i32> [#uses=1]
+	%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0		; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
+	store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %58, align 8
+	%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %57, i32* %59, align 4
+	%60 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 3, i32* %60, align 8
+	unreachable
+}
+
+declare void @_ZmlRK11FixedMatrixIdLi6ELi6ELi0ELi0EERK18PhiMatrixTranspose(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.PhiMatrixTranspose*)
+
+declare void @_ZmlI4Mat3K11FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS4_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare void @_ZmiI4Mat3K11FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS4_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare %"struct.FixedMatrixBase<double,3,3>"* @_ZN15FixedMatrixBaseIdLi3ELi3EEmIERKS0_(%"struct.FixedMatrixBase<double,3,3>"*, %"struct.FixedMatrixBase<double,3,3>"*)
+
+declare void @_ZplI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RKS3_RK9SubMatrixIT0_E(%struct.Mat3* noalias sret, %struct.Mat3*, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*)
+
+declare void @_ZN13CDSVectorBaseIdN3CDS12DefaultAllocEED2Ev(%"struct.CDSVectorBase<double,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<1>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi1ELi1ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,1,1,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,1,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare i8* @__cxa_get_exception_ptr(i8*) nounwind
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1ELi1EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,1>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,6>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEi(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIcERSoS0_RK9CDSStringIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.String*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEPFRSoS_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.std::basic_ostream<char,std::char_traits<char> >"* (%"struct.std::basic_ostream<char,std::char_traits<char> >"*)*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"struct.std::basic_ostream<char,std::char_traits<char> >"*)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZmlI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RKS3_RK9SubMatrixIT0_E(%struct.Mat3* noalias sret, %struct.Mat3*, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*)
+
+declare void @_ZmlI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS3_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare void @_ZmiI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS3_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare %"struct.FixedMatrixBase<double,6,6>"* @_ZN15FixedMatrixBaseIdLi6ELi6EEmIERKS0_(%"struct.FixedMatrixBase<double,6,6>"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEEC2EiS2_(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*, i32, %"struct.CDS::DefaultAlloc"* byval align 4)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEED2Ev(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeTorsionD0Ev(%struct.HNodeTorsion*)
+
+declare void @_ZN12HNodeTorsionD1Ev(%struct.HNodeTorsion*)
+
+declare void @_ZN12HNodeRotate3D0Ev(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate3D1Ev(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate318enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate35printEi(%struct.HNodeRotate3*, i32)
+
+declare void @_ZN12HNodeRotate35calcHEv(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate311toCartesianEv(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate2D0Ev(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate2D1Ev(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate26setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate218enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate25printEi(%struct.HNodeRotate2*, i32)
+
+declare void @_ZN12HNodeRotate25calcHEv(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate211toCartesianEv(%struct.HNodeRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate3D0Ev(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate3D1Ev(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate318enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate35printEi(%struct.HNodeTranslateRotate3*, i32)
+
+declare void @_ZN21HNodeTranslateRotate35calcHEv(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate2D0Ev(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate2D1Ev(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate26setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate218enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate25printEi(%struct.HNodeTranslateRotate2*, i32)
+
+declare void @_ZN21HNodeTranslateRotate25calcHEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate211toCartesianEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN14HNodeTranslateC2EPK9HingeNodeP7IVMAtomRi(%struct.HNodeTranslate*, %struct.HingeNode*, %struct.IVMAtom*, i32*)
+
+declare void @_ZN14HNodeTranslateD1Ev(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslateD0Ev(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslate5calcHEv(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslate11toCartesianEv(%struct.HNodeTranslate*)
+
+declare void @_ZN12HNodeRotate3C2EPK9HingeNodeP7IVMAtomRib(%struct.HNodeRotate3*, %struct.HingeNode*, %struct.IVMAtom*, i32*, i8 zeroext)
+
+declare void @_ZN8AtomTree6findCMEPK9HingeNode(%struct.Vec3* noalias sret, %struct.HingeNode*)
+
+declare %struct.IVMAtom** @_ZN7CDSListIP7IVMAtomE7prependERKS1_(%struct.AtomList*, %struct.IVMAtom**)
+
+declare %"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"* @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEE6resizeEi(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*, i32)
+
+declare void @_ZN12HNodeRotate2C2EPK9HingeNodeRK4Vec3Ri(%struct.HNodeRotate2*, %struct.HingeNode*, %struct.Vec3*, i32*)
+
+declare void @_ZN21HNodeTranslateRotate3C2EPK9HingeNodeP7IVMAtomRib(%struct.HNodeTranslateRotate3*, %struct.HingeNode*, %struct.IVMAtom*, i32*, i8 zeroext)
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcPropsEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi3ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI4Mat3EENT_13TransposeTypeERKS2_(%struct.Mat3* noalias sret, %struct.Mat3*)
+
+declare void @_Z10blockMat12IdLi3ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,3,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi3ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi3ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi4EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare double @_Z4normIdLi4EET_RK11FixedVectorIS0_XT0_ELi0EE(%"struct.FixedMatrix<double,2,2,0,0>"*)
+
+declare %"struct.FixedMatrixBase<double,2,2>"* @_ZN15FixedVectorBaseIdLi4EEdVERKd(%"struct.FixedMatrixBase<double,2,2>"*, double*)
+
+declare %"struct.FixedMatrixBase<double,2,2>"* @_ZN15FixedVectorBaseIdLi4EEmIERKS0_(%"struct.FixedMatrixBase<double,2,2>"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi3ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi3EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<3>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi3ELi3ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3ELi3EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,3,3>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,3,6>"*)
+
+declare void @_Z7unitVecRK4Vec3(%struct.Vec3* noalias sret, %struct.Vec3*)
+
+declare double @_Z4normIdLi3EET_RK11FixedVectorIS0_XT0_ELi0EE(%"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN12HNodeTorsionC2EPK9HingeNodeRK4Vec3Ri(%struct.HNodeTorsion*, %struct.HingeNode*, %struct.Vec3*, i32*)
+
+declare double @acos(double) nounwind readnone
+
+declare double @atan2(double, double) nounwind readnone
+
+declare void @_ZN21HNodeTranslateRotate2C2EPK9HingeNodeRi(%struct.HNodeTranslateRotate2*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcPropsEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi2ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_Z10blockMat21IdLi1ELi3ELi1EE11FixedMatrixIT_XplT0_T2_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT2_EXT1_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_Z10blockMat12IdLi2ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi2ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi2ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedVectorBase<double,2>"*)
+
+declare %"struct.FixedMatrixBase<double,1,3>"* @_ZN15FixedVectorBaseIdLi3EEdVERKd(%"struct.FixedMatrixBase<double,1,3>"*, double*)
+
+declare %"struct.FixedMatrixBase<double,1,3>"* @_ZN15FixedVectorBaseIdLi3EEmIERKS0_(%"struct.FixedMatrixBase<double,1,3>"*, %"struct.FixedMatrixBase<double,1,3>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi2ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi2EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<2>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi2ELi2ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,2,2,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,2,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2ELi2EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,6>"*)
+
+declare zeroext i8 @_ZNK9CDSStringIcE7matchesEPKcb(%struct.String*, i8*, i8 zeroext)
+
+declare %struct.HingeNode* @_Z9constructP9HingeNodeRKN16InternalDynamics9HingeSpecERi(%struct.HingeNode*, %"struct.InternalDynamics::HingeSpec"*, i32*)
+
+declare void @_ZN9CDSStringIcEC1ERKS0_(%struct.String*, %struct.String*)
+
+declare void @_ZN9CDSStringIcE8downcaseEv(%struct.String*)
+
+declare %struct.String* @_ZN9CDSStringIcEaSEPKc(%struct.String*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIP7IVMAtomERSoS2_RK7CDSListIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.AtomList*)
+
+declare i32 @_ZNK7CDSListIP9HingeNodeE8getIndexERKS1_(%"struct.CDSList<HingeNode*>"*, %struct.HingeNode**)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi6ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %struct.Mat6*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi6ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %struct.Mat6*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi6EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<6>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi6ELi6ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%struct.Mat6* noalias sret, %struct.Mat6*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi6ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcPropsEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi5ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi5ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi5ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedVectorBase<double,5>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi5ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi5EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<5>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi5ELi5ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,5,5,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,5,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5ELi5EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,5,5>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,5,6>"*)
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind

diff --git a/test/CodeGen/X86/negative-sin.ll b/test/CodeGen/X86/negative-sin.ll
new file mode 100644
index 0000000..7842eb8
--- /dev/null
+++ b/test/CodeGen/X86/negative-sin.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | \
+; RUN:   not egrep {addsd|subsd|xor}
+
+declare double @sin(double %f)
+
+define double @foo(double %e)
+{
+  %f = fsub double 0.0, %e
+  %g = call double @sin(double %f) readonly
+  %h = fsub double 0.0, %g
+  ret double %h
+}

diff --git a/test/CodeGen/X86/negative-stride-fptosi-user.ll b/test/CodeGen/X86/negative-stride-fptosi-user.ll
new file mode 100644
index 0000000..332e0b9
--- /dev/null
+++ b/test/CodeGen/X86/negative-stride-fptosi-user.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | grep cvtsi2sd
+
+; LSR previously eliminated the sitofp by introducing an induction
+; variable which stepped by a bogus ((double)UINT32_C(-1)). It's theoretically
+; possible to eliminate the sitofp using a proper -1.0 step though; this
+; test should be changed if that is done.
+
+define void @foo(i32 %N) nounwind {
+entry:
+  %0 = icmp slt i32 %N, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ 0, %entry ], [ %2, %bb ]      ; <i32> [#uses=2]
+  %1 = sitofp i32 %i.03 to double                  ; <double> [#uses=1]
+  tail call void @bar(double %1) nounwind
+  %2 = add nsw i32 %i.03, -1                       ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %2, %N                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare void @bar(double)

diff --git a/test/CodeGen/X86/negative-subscript.ll b/test/CodeGen/X86/negative-subscript.ll
new file mode 100644
index 0000000..28f7d6b
--- /dev/null
+++ b/test/CodeGen/X86/negative-subscript.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+; rdar://6559995
+
+@a = external global [255 x i8*], align 32
+
+define i32 @main() nounwind {
+entry:
+	store i8* bitcast (i8** getelementptr ([255 x i8*]* @a, i32 0, i32 -2147483624) to i8*), i8** getelementptr ([255 x i8*]* @a, i32 0, i32 16), align 32
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/negative_zero.ll b/test/CodeGen/X86/negative_zero.ll
new file mode 100644
index 0000000..29474c2
--- /dev/null
+++ b/test/CodeGen/X86/negative_zero.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs
+
+
+define double @T() {
+	ret double -1.0   ;; codegen as fld1/fchs, not as a load from cst pool
+}

diff --git a/test/CodeGen/X86/nobt.ll b/test/CodeGen/X86/nobt.ll
new file mode 100644
index 0000000..35090e3
--- /dev/null
+++ b/test/CodeGen/X86/nobt.ll

@@ -0,0 +1,70 @@
+; RUN: llc < %s -march=x86 | not grep btl
+
+; This tests some cases where BT must not be generated.  See also bt.ll.
+; Fixes 20040709-[12].c in gcc testsuite.
+
+define void @test2(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, %tmp2	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test4(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test5(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare void @foo()

diff --git a/test/CodeGen/X86/nofence.ll b/test/CodeGen/X86/nofence.ll
new file mode 100644
index 0000000..244d2e9
--- /dev/null
+++ b/test/CodeGen/X86/nofence.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep fence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true,  i1 false, i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 true,  i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true,  i1 true,  i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 false, i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 false, i1 false, i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 true,  i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 true,  i1 false, i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 true,  i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true,  i1 true,  i1 true,  i1 false,  i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 true,  i1 false,  i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 false,  i1 true,  i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 false,  i1 true,  i1 true,  i1 true,  i1 false)
+
+
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
+	ret void
+}

diff --git a/test/CodeGen/X86/nosse-error1.ll b/test/CodeGen/X86/nosse-error1.ll
new file mode 100644
index 0000000..16cbb73
--- /dev/null
+++ b/test/CodeGen/X86/nosse-error1.ll

@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s > %t1
+; RUN: not llc -march=x86-64 -mattr=-sse < %t1 2> %t2
+; RUN: grep "SSE register return with SSE disabled" %t2
+; RUN: llc -march=x86-64 < %t1 | grep xmm
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@f = external global float		; <float*> [#uses=4]
+@d = external global double		; <double*> [#uses=4]
+
+define void @test() nounwind {
+entry:
+	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%1 = tail call float @foo1(float %0) nounwind		; <float> [#uses=1]
+	store float %1, float* @f, align 4
+	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%3 = tail call double @foo2(double %2) nounwind		; <double> [#uses=1]
+	store double %3, double* @d, align 8
+	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%5 = tail call float @foo3(float %4) nounwind		; <float> [#uses=1]
+	store float %5, float* @f, align 4
+	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%7 = tail call double @foo4(double %6) nounwind		; <double> [#uses=1]
+	store double %7, double* @d, align 8
+	ret void
+}
+
+declare float @foo1(float)
+
+declare double @foo2(double)
+
+declare float @foo3(float)
+
+declare double @foo4(double)

diff --git a/test/CodeGen/X86/nosse-error2.ll b/test/CodeGen/X86/nosse-error2.ll
new file mode 100644
index 0000000..45a5eaf
--- /dev/null
+++ b/test/CodeGen/X86/nosse-error2.ll

@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s > %t1
+; RUN: not llc -march=x86 -mcpu=i686 -mattr=-sse < %t1 2> %t2
+; RUN: grep "SSE register return with SSE disabled" %t2
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+sse < %t1 | grep xmm
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-unknown-linux-gnu"
+@f = external global float		; <float*> [#uses=4]
+@d = external global double		; <double*> [#uses=4]
+
+define void @test() nounwind {
+entry:
+	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%1 = tail call inreg float @foo1(float inreg %0) nounwind		; <float> [#uses=1]
+	store float %1, float* @f, align 4
+	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%3 = tail call inreg double @foo2(double inreg %2) nounwind		; <double> [#uses=1]
+	store double %3, double* @d, align 8
+	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%5 = tail call inreg float @foo3(float inreg %4) nounwind		; <float> [#uses=1]
+	store float %5, float* @f, align 4
+	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%7 = tail call inreg double @foo4(double inreg %6) nounwind		; <double> [#uses=1]
+	store double %7, double* @d, align 8
+	ret void
+}
+
+declare inreg float @foo1(float inreg)
+
+declare inreg double @foo2(double inreg)
+
+declare inreg float @foo3(float inreg)
+
+declare inreg double @foo4(double inreg)

diff --git a/test/CodeGen/X86/nosse-varargs.ll b/test/CodeGen/X86/nosse-varargs.ll
new file mode 100644
index 0000000..e6da0ab
--- /dev/null
+++ b/test/CodeGen/X86/nosse-varargs.ll

@@ -0,0 +1,46 @@
+; RUN: llvm-as < %s > %t
+; RUN: llc -march=x86-64 -mattr=-sse < %t | not grep xmm
+; RUN: llc -march=x86-64 < %t | grep xmm
+; PR3403
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @foo(float %a, i8* nocapture %fmt, ...) nounwind {
+entry:
+	%ap = alloca [1 x %struct.__va_list_tag], align 8		; <[1 x %struct.__va_list_tag]*> [#uses=4]
+	%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %ap12)
+	%0 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0		; <i32*> [#uses=2]
+	%1 = load i32* %0, align 8		; <i32> [#uses=3]
+	%2 = icmp ult i32 %1, 48		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %bb3
+
+bb:		; preds = %entry
+	%3 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3		; <i8**> [#uses=1]
+	%4 = load i8** %3, align 8		; <i8*> [#uses=1]
+	%5 = inttoptr i32 %1 to i8*		; <i8*> [#uses=1]
+	%6 = ptrtoint i8* %5 to i64		; <i64> [#uses=1]
+	%ctg2 = getelementptr i8* %4, i64 %6		; <i8*> [#uses=1]
+	%7 = add i32 %1, 8		; <i32> [#uses=1]
+	store i32 %7, i32* %0, align 8
+	br label %bb4
+
+bb3:		; preds = %entry
+	%8 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2		; <i8**> [#uses=2]
+	%9 = load i8** %8, align 8		; <i8*> [#uses=2]
+	%10 = getelementptr i8* %9, i64 8		; <i8*> [#uses=1]
+	store i8* %10, i8** %8, align 8
+	br label %bb4
+
+bb4:		; preds = %bb3, %bb
+	%addr.0.0 = phi i8* [ %ctg2, %bb ], [ %9, %bb3 ]		; <i8*> [#uses=1]
+	%11 = bitcast i8* %addr.0.0 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	call void @llvm.va_end(i8* %ap12)
+	ret i32 %12
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind

diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
new file mode 100644
index 0000000..eed3cfc
--- /dev/null
+++ b/test/CodeGen/X86/object-size.ll

@@ -0,0 +1,55 @@
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; ModuleID = 'ts.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+@p = common global i8* null, align 8              ; <i8**> [#uses=4]
+@.str = private constant [3 x i8] c"Hi\00"        ; <[3 x i8]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %tmp = load i8** @p                             ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
+  %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
+; X64: movq    $-1, %rax
+; X64: cmpq    $-1, %rax
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %tmp3 = load i8** @p                            ; <i8*> [#uses=1]
+  %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %call, %cond.true ], [ %call4, %cond.false ] ; <i8*> [#uses=0]
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
+
+define internal i8* @__inline_strcpy_chk(i8* %__dest, i8* %__src) nounwind ssp {
+entry:
+  %retval = alloca i8*                            ; <i8**> [#uses=2]
+  %__dest.addr = alloca i8*                       ; <i8**> [#uses=3]
+  %__src.addr = alloca i8*                        ; <i8**> [#uses=2]
+  store i8* %__dest, i8** %__dest.addr
+  store i8* %__src, i8** %__src.addr
+  %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
+  %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
+  %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
+  store i8* %call, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}

diff --git a/test/CodeGen/X86/omit-label.ll b/test/CodeGen/X86/omit-label.ll
new file mode 100644
index 0000000..0ec03eb
--- /dev/null
+++ b/test/CodeGen/X86/omit-label.ll

@@ -0,0 +1,57 @@
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-linux-gnu | FileCheck %s
+; PR4126
+; PR4732
+
+; Don't omit these labels' definitions.
+
+; CHECK: bux:
+; CHECK: LBB1_1:
+
+define void @bux(i32 %p_53) nounwind optsize {
+entry:
+	%0 = icmp eq i32 %p_53, 0		; <i1> [#uses=1]
+	%1 = icmp sgt i32 %p_53, 0		; <i1> [#uses=1]
+	%or.cond = and i1 %0, %1		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb.i, label %bb3
+
+bb.i:		; preds = %entry
+	%2 = add i32 %p_53, 1		; <i32> [#uses=1]
+	%3 = icmp slt i32 %2, 0		; <i1> [#uses=0]
+	br label %bb3
+
+bb3:		; preds = %bb.i, %entry
+	%4 = tail call i32 (...)* @baz(i32 0) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @baz(...)
+
+; Don't omit this label in the assembly output.
+; CHECK: int321:
+; CHECK: LBB2_1
+; CHECK: LBB2_1
+; CHECK: LBB2_1:
+
+define void @int321(i8 signext %p_103, i32 %uint8p_104) nounwind readnone {
+entry:
+  %tobool = icmp eq i8 %p_103, 0                  ; <i1> [#uses=1]
+  %cmp.i = icmp sgt i8 %p_103, 0                  ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool, %cmp.i               ; <i1> [#uses=1]
+  br i1 %or.cond, label %land.end.i, label %for.cond.preheader
+
+land.end.i:                                       ; preds = %entry
+  %conv3.i = sext i8 %p_103 to i32                ; <i32> [#uses=1]
+  %div.i = sdiv i32 1, %conv3.i                   ; <i32> [#uses=1]
+  %tobool.i = icmp eq i32 %div.i, -2147483647     ; <i1> [#uses=0]
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %land.end.i, %entry
+  %cmp = icmp sgt i8 %p_103, 1                    ; <i1> [#uses=1]
+  br i1 %cmp, label %for.end.split, label %for.cond
+
+for.cond:                                         ; preds = %for.cond.preheader, %for.cond
+  br label %for.cond
+
+for.end.split:                                    ; preds = %for.cond.preheader
+  ret void
+}

diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
new file mode 100644
index 0000000..fa2aef5
--- /dev/null
+++ b/test/CodeGen/X86/opt-ext-uses.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | grep movw | count 1
+
+define i16 @t() signext  {
+entry:
+        %tmp180 = load i16* null, align 2               ; <i16> [#uses=3]
+        %tmp180181 = sext i16 %tmp180 to i32            ; <i32> [#uses=1]
+        %tmp182 = add i16 %tmp180, 10
+        %tmp185 = icmp slt i16 %tmp182, 0               ; <i1> [#uses=1]
+        br i1 %tmp185, label %cond_true188, label %cond_next245
+
+cond_true188:           ; preds = %entry
+        %tmp195196 = trunc i16 %tmp180 to i8            ; <i8> [#uses=0]
+        ret i16 %tmp180
+
+cond_next245:           ; preds = %entry
+        %tmp256 = and i32 %tmp180181, 15                ; <i32> [#uses=0]
+        %tmp3 = trunc i32 %tmp256 to i16
+        ret i16 %tmp3
+}

diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
new file mode 100644
index 0000000..162c7a5
--- /dev/null
+++ b/test/CodeGen/X86/optimize-max-0.ll

@@ -0,0 +1,461 @@
+; RUN: llc < %s -march=x86 | not grep cmov
+
+; LSR should be able to eliminate the max computations by
+; making the loops use slt/ult comparisons instead of ne comparisons.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+	%0 = mul i32 %x, %w		; <i32> [#uses=2]
+	%1 = mul i32 %x, %w		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 4		; <i32> [#uses=1]
+	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
+	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:		; preds = %entry
+	%3 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:		; preds = %bb7.preheader
+	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
+	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
+	%6 = add i32 %5, 1		; <i32> [#uses=1]
+	%tmp8 = icmp sgt i32 1, %w		; <i1> [#uses=1]
+	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb7, %bb.nph7
+	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
+	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
+	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
+	%9 = add i32 %6, %8		; <i32> [#uses=1]
+	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
+	store i8 %11, i8* %12, align 1
+	br label %bb7
+
+bb7:		; preds = %bb6
+	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
+	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
+	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:		; preds = %bb7
+	br label %bb9
+
+bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
+	br label %bb10
+
+bb10:		; preds = %bb9
+	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
+	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
+	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:		; preds = %bb10
+	br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+	br label %bb18.loopexit
+
+bb.nph9:		; preds = %bb10.preheader
+	%13 = icmp sgt i32 %w, 0		; <i1> [#uses=1]
+	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:		; preds = %bb.nph9
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb.nph9.split, %bb10
+	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
+	br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:		; preds = %bb18.loopexit
+	%14 = sdiv i32 %w, 2		; <i32> [#uses=1]
+	%15 = icmp slt i32 %w, 2		; <i1> [#uses=1]
+	%16 = sdiv i32 %x, 2		; <i32> [#uses=2]
+	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:		; preds = %bb.nph5
+	%tmp2 = icmp sgt i32 1, %16		; <i1> [#uses=1]
+	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb18, %bb.nph5.split
+	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
+	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
+	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
+	%19 = srem i32 %y.14, 2		; <i32> [#uses=1]
+	%20 = add i32 %19, %18		; <i32> [#uses=1]
+	%21 = mul i32 %20, %s		; <i32> [#uses=2]
+	br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:		; preds = %bb13
+	%22 = add i32 %17, %0		; <i32> [#uses=1]
+	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
+	%24 = sdiv i32 %w, 2		; <i32> [#uses=2]
+	%tmp = icmp sgt i32 1, %24		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb15, %bb.nph3
+	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
+	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%26 = add i32 %25, %21		; <i32> [#uses=1]
+	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
+	%28 = load i8* %27, align 1		; <i8> [#uses=1]
+	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
+	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %28, i8* %29, align 1
+	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%31 = or i32 %30, 2		; <i32> [#uses=1]
+	%32 = add i32 %31, %21		; <i32> [#uses=1]
+	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
+	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %34, i8* %35, align 1
+	br label %bb15
+
+bb15:		; preds = %bb14
+	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
+	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:		; preds = %bb15
+	br label %bb17
+
+bb17:		; preds = %bb15.bb17_crit_edge, %bb13
+	br label %bb18
+
+bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+	%36 = icmp slt i32 %x, 2		; <i1> [#uses=1]
+	br i1 %36, label %bb20, label %bb.nph5
+
+bb18:		; preds = %bb17
+	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
+	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
+	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:		; preds = %bb18
+	br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
+	br label %bb20
+
+bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+	switch i32 %d, label %return [
+		i32 3, label %bb22
+		i32 1, label %bb29
+	]
+
+bb22:		; preds = %bb20
+	%37 = mul i32 %x, %w		; <i32> [#uses=1]
+	%38 = sdiv i32 %37, 4		; <i32> [#uses=1]
+	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
+	%39 = add i32 %x, 15		; <i32> [#uses=1]
+	%40 = and i32 %39, -16		; <i32> [#uses=1]
+	%41 = add i32 %w, 15		; <i32> [#uses=1]
+	%42 = and i32 %41, -16		; <i32> [#uses=1]
+	%43 = mul i32 %40, %s		; <i32> [#uses=1]
+	%44 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:		; preds = %bb22
+	br label %bb23
+
+bb23:		; preds = %bb24, %bb.nph
+	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
+	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
+	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
+	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
+	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
+	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
+	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
+	br label %bb24
+
+bb24:		; preds = %bb23
+	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
+	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
+	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:		; preds = %bb24
+	br label %bb26
+
+bb26:		; preds = %bb24.bb26_crit_edge, %bb22
+	%49 = mul i32 %x, %w		; <i32> [#uses=1]
+	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
+	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
+	%51 = mul i32 %x, %w		; <i32> [#uses=1]
+	%52 = sdiv i32 %51, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
+	ret void
+
+bb29:		; preds = %bb20, %entry
+	%53 = add i32 %w, 15		; <i32> [#uses=1]
+	%54 = and i32 %53, -16		; <i32> [#uses=1]
+	%55 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:		; preds = %bb29
+	br label %bb30
+
+bb30:		; preds = %bb31, %bb.nph11
+	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
+	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
+	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
+	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
+	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
+	br label %bb31
+
+bb31:		; preds = %bb30
+	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
+	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
+	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:		; preds = %bb31
+	br label %bb33
+
+bb33:		; preds = %bb31.bb33_crit_edge, %bb29
+	%60 = mul i32 %x, %w		; <i32> [#uses=1]
+	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
+	%62 = mul i32 %x, %w		; <i32> [#uses=1]
+	%63 = sdiv i32 %62, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
+	ret void
+
+return:		; preds = %bb20
+	ret void
+}
+
+define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+	%0 = mul i32 %x, %w		; <i32> [#uses=2]
+	%1 = mul i32 %x, %w		; <i32> [#uses=1]
+	%2 = udiv i32 %1, 4		; <i32> [#uses=1]
+	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
+	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:		; preds = %entry
+	%3 = icmp ne i32 %x, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:		; preds = %bb7.preheader
+	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
+	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
+	%6 = add i32 %5, 1		; <i32> [#uses=1]
+	%tmp8 = icmp ugt i32 1, %w		; <i1> [#uses=1]
+	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb7, %bb.nph7
+	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
+	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
+	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
+	%9 = add i32 %6, %8		; <i32> [#uses=1]
+	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
+	store i8 %11, i8* %12, align 1
+	br label %bb7
+
+bb7:		; preds = %bb6
+	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
+	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
+	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:		; preds = %bb7
+	br label %bb9
+
+bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
+	br label %bb10
+
+bb10:		; preds = %bb9
+	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
+	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
+	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:		; preds = %bb10
+	br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+	br label %bb18.loopexit
+
+bb.nph9:		; preds = %bb10.preheader
+	%13 = icmp ugt i32 %w, 0		; <i1> [#uses=1]
+	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:		; preds = %bb.nph9
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb.nph9.split, %bb10
+	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
+	br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:		; preds = %bb18.loopexit
+	%14 = udiv i32 %w, 2		; <i32> [#uses=1]
+	%15 = icmp ult i32 %w, 2		; <i1> [#uses=1]
+	%16 = udiv i32 %x, 2		; <i32> [#uses=2]
+	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:		; preds = %bb.nph5
+	%tmp2 = icmp ugt i32 1, %16		; <i1> [#uses=1]
+	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb18, %bb.nph5.split
+	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
+	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
+	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
+	%19 = urem i32 %y.14, 2		; <i32> [#uses=1]
+	%20 = add i32 %19, %18		; <i32> [#uses=1]
+	%21 = mul i32 %20, %s		; <i32> [#uses=2]
+	br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:		; preds = %bb13
+	%22 = add i32 %17, %0		; <i32> [#uses=1]
+	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
+	%24 = udiv i32 %w, 2		; <i32> [#uses=2]
+	%tmp = icmp ugt i32 1, %24		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb15, %bb.nph3
+	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
+	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%26 = add i32 %25, %21		; <i32> [#uses=1]
+	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
+	%28 = load i8* %27, align 1		; <i8> [#uses=1]
+	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
+	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %28, i8* %29, align 1
+	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%31 = or i32 %30, 2		; <i32> [#uses=1]
+	%32 = add i32 %31, %21		; <i32> [#uses=1]
+	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
+	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %34, i8* %35, align 1
+	br label %bb15
+
+bb15:		; preds = %bb14
+	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
+	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:		; preds = %bb15
+	br label %bb17
+
+bb17:		; preds = %bb15.bb17_crit_edge, %bb13
+	br label %bb18
+
+bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+	%36 = icmp ult i32 %x, 2		; <i1> [#uses=1]
+	br i1 %36, label %bb20, label %bb.nph5
+
+bb18:		; preds = %bb17
+	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
+	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
+	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:		; preds = %bb18
+	br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
+	br label %bb20
+
+bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+	switch i32 %d, label %return [
+		i32 3, label %bb22
+		i32 1, label %bb29
+	]
+
+bb22:		; preds = %bb20
+	%37 = mul i32 %x, %w		; <i32> [#uses=1]
+	%38 = udiv i32 %37, 4		; <i32> [#uses=1]
+	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
+	%39 = add i32 %x, 15		; <i32> [#uses=1]
+	%40 = and i32 %39, -16		; <i32> [#uses=1]
+	%41 = add i32 %w, 15		; <i32> [#uses=1]
+	%42 = and i32 %41, -16		; <i32> [#uses=1]
+	%43 = mul i32 %40, %s		; <i32> [#uses=1]
+	%44 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:		; preds = %bb22
+	br label %bb23
+
+bb23:		; preds = %bb24, %bb.nph
+	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
+	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
+	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
+	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
+	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
+	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
+	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
+	br label %bb24
+
+bb24:		; preds = %bb23
+	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
+	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
+	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:		; preds = %bb24
+	br label %bb26
+
+bb26:		; preds = %bb24.bb26_crit_edge, %bb22
+	%49 = mul i32 %x, %w		; <i32> [#uses=1]
+	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
+	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
+	%51 = mul i32 %x, %w		; <i32> [#uses=1]
+	%52 = udiv i32 %51, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
+	ret void
+
+bb29:		; preds = %bb20, %entry
+	%53 = add i32 %w, 15		; <i32> [#uses=1]
+	%54 = and i32 %53, -16		; <i32> [#uses=1]
+	%55 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:		; preds = %bb29
+	br label %bb30
+
+bb30:		; preds = %bb31, %bb.nph11
+	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
+	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
+	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
+	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
+	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
+	br label %bb31
+
+bb31:		; preds = %bb30
+	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
+	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
+	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:		; preds = %bb31
+	br label %bb33
+
+bb33:		; preds = %bb31.bb33_crit_edge, %bb29
+	%60 = mul i32 %x, %w		; <i32> [#uses=1]
+	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
+	%62 = mul i32 %x, %w		; <i32> [#uses=1]
+	%63 = udiv i32 %62, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
+	ret void
+
+return:		; preds = %bb20
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind

diff --git a/test/CodeGen/X86/optimize-max-1.ll b/test/CodeGen/X86/optimize-max-1.ll
new file mode 100644
index 0000000..ad6c24d
--- /dev/null
+++ b/test/CodeGen/X86/optimize-max-1.ll

@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=x86-64 | not grep cmov
+
+; LSR should be able to eliminate both smax and umax expressions
+; in loop trip counts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @fs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp slt i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp sge i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @fu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp eq i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp ne i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}

diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
new file mode 100644
index 0000000..8851c5b
--- /dev/null
+++ b/test/CodeGen/X86/optimize-max-2.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep cmov %t | count 2
+; RUN: grep jne %t | count 1
+
+; LSR's OptimizeMax function shouldn't try to eliminate this max, because
+; it has three operands.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(double* nocapture %p, i64 %x, i64 %y) nounwind {
+entry:
+	%tmp = icmp eq i64 %y, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %y		; <i64> [#uses=2]
+	%tmp8 = icmp ugt i64 %umax, %x		; <i1> [#uses=1]
+	%umax9 = select i1 %tmp8, i64 %umax, i64 %x		; <i64> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb4, %entry
+	%i.07 = phi i64 [ 0, %entry ], [ %2, %bb4 ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.07		; <double*> [#uses=2]
+	%0 = load double* %scevgep, align 8		; <double> [#uses=1]
+	%1 = fmul double %0, 2.000000e+00		; <double> [#uses=1]
+	store double %1, double* %scevgep, align 8
+	%2 = add i64 %i.07, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %2, %umax9		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb4
+
+return:		; preds = %bb4
+	ret void
+}

diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
new file mode 100644
index 0000000..9ebf890
--- /dev/null
+++ b/test/CodeGen/X86/or-branch.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86  | not grep set
+
+define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
+entry:
+	%tmp = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp.upgrd.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]
+	%tmp3 = icmp slt i32 %Y, 5		; <i1> [#uses=1]
+	%tmp4 = or i1 %tmp3, %tmp.upgrd.1		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp5 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)

diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
new file mode 100644
index 0000000..c1fc041
--- /dev/null
+++ b/test/CodeGen/X86/overlap-shift.ll

@@ -0,0 +1,19 @@
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y.  Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered.  However, this copy is not needed if the register
+;; allocator turns the shift into an LEA.  This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep {mov E.X, E.X}
+
+@G = external global i32                ; <i32*> [#uses=1]
+
+define i32 @test1(i32 %X) {
+        %Z = shl i32 %X, 2              ; <i32> [#uses=1]
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}
+

diff --git a/test/CodeGen/X86/packed_struct.ll b/test/CodeGen/X86/packed_struct.ll
new file mode 100644
index 0000000..da6e8f8
--- /dev/null
+++ b/test/CodeGen/X86/packed_struct.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep foos+5 %t
+; RUN: grep foos+1 %t
+; RUN: grep foos+9 %t
+; RUN: grep bara+19 %t
+; RUN: grep bara+4 %t
+
+; make sure we compute the correct offset for a packed structure
+
+;Note: codegen for this could change rendering the above checks wrong
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.anon = type <{ i8, i32, i32, i32 }>
+@foos = external global %struct.anon		; <%struct.anon*> [#uses=3]
+@bara = weak global [4 x <{ i32, i8 }>] zeroinitializer		; <[4 x <{ i32, i8 }>]*> [#uses=2]
+
+define i32 @foo() nounwind {
+entry:
+	%tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp3, %tmp		; <i32> [#uses=1]
+	%tmp7 = add i32 %tmp4, %tmp6		; <i32> [#uses=1]
+	ret i32 %tmp7
+}
+
+define i8 @bar() nounwind {
+entry:
+	%tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]
+	%tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]
+	%tmp5 = add i8 %tmp4, %tmp		; <i8> [#uses=1]
+	ret i8 %tmp5
+}

diff --git a/test/CodeGen/X86/palignr-2.ll b/test/CodeGen/X86/palignr-2.ll
new file mode 100644
index 0000000..116d4c7
--- /dev/null
+++ b/test/CodeGen/X86/palignr-2.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mattr=+ssse3 | FileCheck %s
+; rdar://7341330
+
+@a = global [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 16 ; <[4 x i32]*> [#uses=1]
+@c = common global [4 x i32] zeroinitializer, align 16 ; <[4 x i32]*> [#uses=1]
+@b = global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16 ; <[4 x i32]*> [#uses=1]
+
+define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
+entry:
+; CHECK: t1:
+; palignr $3, %xmm1, %xmm0
+  %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone
+  store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}
+
+declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; palignr $4, _b, %xmm0
+  %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone
+  store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}

diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
new file mode 100644
index 0000000..3812c72
--- /dev/null
+++ b/test/CodeGen/X86/palignr.ll

@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=YONAH %s
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: pshufd
+; CHECK-YONAH: pshufd
+  %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: palignr
+; CHECK-YONAH: shufps
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
+	ret <4 x i32> %C
+}
+
+define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
+	ret <4 x float> %C
+}
+
+define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
+	ret <8 x i16> %C
+}
+
+define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
+	ret <8 x i16> %C
+}
+
+define <8 x i16> @test8(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
+	ret <8 x i16> %C
+}
+
+define <16 x i8> @test9(<16 x i8> %A, <16 x i8> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
+	ret <16 x i8> %C
+}

diff --git a/test/CodeGen/X86/peep-test-0.ll b/test/CodeGen/X86/peep-test-0.ll
new file mode 100644
index 0000000..e521d8e
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-0.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep cmp %t
+; RUN: not grep test %t
+
+define void @loop(i64 %n, double* nocapture %d) nounwind {
+entry:
+	br label %bb
+
+bb:
+	%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %bb ]
+	%i.03 = add i64 %indvar, %n
+	%0 = getelementptr double* %d, i64 %i.03
+	%1 = load double* %0, align 8
+	%2 = fmul double %1, 3.000000e+00
+	store double %2, double* %0, align 8
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}

diff --git a/test/CodeGen/X86/peep-test-1.ll b/test/CodeGen/X86/peep-test-1.ll
new file mode 100644
index 0000000..f83f0f6
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-1.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep dec %t | count 1
+; RUN: not grep test %t
+; RUN: not grep cmp %t
+
+define void @foo(i32 %n, double* nocapture %p) nounwind {
+	br label %bb
+
+bb:
+	%indvar = phi i32 [ 0, %0 ], [ %indvar.next, %bb ]
+	%i.03 = sub i32 %n, %indvar
+	%1 = getelementptr double* %p, i32 %i.03
+	%2 = load double* %1, align 4
+	%3 = fmul double %2, 2.930000e+00
+	store double %3, double* %1, align 4
+	%4 = add i32 %i.03, -1
+	%phitmp = icmp slt i32 %4, 0
+	%indvar.next = add i32 %indvar, 1
+	br i1 %phitmp, label %bb, label %return
+
+return:
+	ret void
+}

diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
new file mode 100644
index 0000000..2745172
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep testl
+
+; It's tempting to eliminate the testl instruction here and just use the
+; EFLAGS value from the incl, however it can't be known whether the add
+; will overflow, and if it does the incl would set OF, and the
+; subsequent setg would return true.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @f(i32 %j) nounwind readnone {
+entry:
+	%0 = add i32 %j, 1		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}

diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
new file mode 100644
index 0000000..a34a978
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-3.ll

@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; rdar://7226797
+
+; LLVM should omit the testl and use the flags result from the orl.
+
+; CHECK: or:
+define void @or(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      orl %ecx, %edx
+; CHECK-NEXT: je
+  %3 = or i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: xor:
+define void @xor(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+; CHECK:      xorl $1, %e
+; CHECK-NEXT: je
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+  %3 = xor i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: and:
+define void @and(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      andl  $3, %
+; CHECK-NEXT: movb  %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %t = trunc i32 %3 to i8
+  store i8 %t, i8* %p
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Just like @and, but without the trunc+store. This should use a testb
+; instead of an andl.
+; CHECK: test:
+define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      testb $3, %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}

diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
new file mode 100644
index 0000000..e4ab2b5
--- /dev/null
+++ b/test/CodeGen/X86/peep-vector-extract-concat.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep {pshufd	\$3, %xmm0, %xmm0}
+
+define float @foo(<8 x float> %a) nounwind {
+  %c = extractelement <8 x float> %a, i32 3
+  ret float %c
+}

diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
new file mode 100644
index 0000000..5e18044
--- /dev/null
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+
+define float @foo(<4 x float> %a) {
+  %b = insertelement <4 x float> %a, float 0.0, i32 3
+  %c = extractelement <4 x float> %b, i32 3
+  ret float %c
+}
+define float @bar(float %a) {
+  %b = insertelement <4 x float> <float 0x400B333340000000, float 4.5, float 0.0, float 0x4022666660000000>, float %a, i32 3
+  %c = extractelement <4 x float> %b, i32 2
+  ret float %c
+}

diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
new file mode 100644
index 0000000..ce57e8f
--- /dev/null
+++ b/test/CodeGen/X86/personality.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+	invoke void @_Z1gv( )
+			to label %return unwind label %unwind
+
+unwind:		; preds = %entry
+	br i1 false, label %eh_then, label %cleanup20
+
+eh_then:		; preds = %unwind
+	invoke void @__cxa_end_catch( )
+			to label %return unwind label %unwind10
+
+unwind10:		; preds = %eh_then
+	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
+	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
+	br i1 %tmp18, label %filter, label %cleanup20
+
+filter:		; preds = %unwind10
+	unreachable
+
+cleanup20:		; preds = %unwind10, %unwind
+	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
+	ret void
+
+return:		; preds = %eh_then, %entry
+	ret void
+}
+
+declare void @_Z1gv()
+
+declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
+
+declare void @__gxx_personality_v0()
+
+declare void @__cxa_end_catch()
+
+; X64: Leh_frame_common_begin:
+; X64: .long	(___gxx_personality_v0@GOTPCREL)+4
+
+; X32: Leh_frame_common_begin:
+; X32: .long	L___gxx_personality_v0$non_lazy_ptr-
+; ....
+
+; X32: .section	__IMPORT,__pointers,non_lazy_symbol_pointers
+; X32: L___gxx_personality_v0$non_lazy_ptr:
+; X32:   .indirect_symbol ___gxx_personality_v0

diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
new file mode 100644
index 0000000..9f9f921
--- /dev/null
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll

@@ -0,0 +1,54 @@
+; PR1296
+; RUN: llc < %s -march=x86 | grep {movl	\$1} | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) {
+entry:
+	switch i32 %A, label %out [
+		 i32 1, label %bb
+		 i32 0, label %bb13
+		 i32 2, label %bb35
+	]
+
+bb:		; preds = %cond_next, %entry
+	%i.144.1 = phi i32 [ 0, %entry ], [ %tmp7, %cond_next ]		; <i32> [#uses=2]
+	%tmp4 = and i32 %i.144.1, %B		; <i32> [#uses=1]
+	icmp eq i32 %tmp4, 0		; <i1>:0 [#uses=1]
+	br i1 %0, label %cond_next, label %out
+
+cond_next:		; preds = %bb
+	%tmp7 = add i32 %i.144.1, 1		; <i32> [#uses=2]
+	icmp slt i32 %tmp7, 1000		; <i1>:1 [#uses=1]
+	br i1 %1, label %bb, label %out
+
+bb13:		; preds = %cond_next18, %entry
+	%i.248.1 = phi i32 [ 0, %entry ], [ %tmp20, %cond_next18 ]		; <i32> [#uses=2]
+	%tmp16 = and i32 %i.248.1, %C		; <i32> [#uses=1]
+	icmp eq i32 %tmp16, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %cond_next18, label %out
+
+cond_next18:		; preds = %bb13
+	%tmp20 = add i32 %i.248.1, 1		; <i32> [#uses=2]
+	icmp slt i32 %tmp20, 1000		; <i1>:3 [#uses=1]
+	br i1 %3, label %bb13, label %out
+
+bb27:		; preds = %bb35
+	%tmp30 = and i32 %i.3, %C		; <i32> [#uses=1]
+	icmp eq i32 %tmp30, 0		; <i1>:4 [#uses=1]
+	br i1 %4, label %cond_next32, label %out
+
+cond_next32:		; preds = %bb27
+	%indvar.next = add i32 %i.3, 1		; <i32> [#uses=1]
+	br label %bb35
+
+bb35:		; preds = %entry, %cond_next32
+	%i.3 = phi i32 [ %indvar.next, %cond_next32 ], [ 0, %entry ]		; <i32> [#uses=3]
+	icmp slt i32 %i.3, 1000		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb27, label %out
+
+out:		; preds = %bb27, %bb35, %bb13, %cond_next18, %bb, %cond_next, %entry
+	%result.0 = phi i32 [ 0, %entry ], [ 1, %bb ], [ 0, %cond_next ], [ 1, %bb13 ], [ 0, %cond_next18 ], [ 1, %bb27 ], [ 0, %bb35 ]		; <i32> [#uses=1]
+	ret i32 %result.0
+}

diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
new file mode 100644
index 0000000..045841e
--- /dev/null
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=local | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=local | FileCheck %s
+; CHECKed instructions should be the same with or without -O0.
+
+@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+; CHECK: movl 24(%esp), %eax
+; CHECK-NOT: movl
+; CHECK: movl	%eax, 36(%esp)
+; CHECK-NOT: movl
+; CHECK: movl 28(%esp), %ebx
+; CHECK-NOT: movl
+; CHECK: movl	%ebx, 40(%esp)
+; CHECK-NOT: movl
+; CHECK: addl %ebx, %eax
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %"%ebx" = alloca i32                            ; <i32*> [#uses=1]
+  %"%eax" = alloca i32                            ; <i32*> [#uses=2]
+  %result = alloca i32                            ; <i32*> [#uses=2]
+  %y = alloca i32                                 ; <i32*> [#uses=2]
+  %x = alloca i32                                 ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 1, i32* %x, align 4
+  store i32 2, i32* %y, align 4
+  call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  %asmtmp = call i32 asm sideeffect alignstack "movl $1, $0", "=={eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %x) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %"%eax"
+  %asmtmp1 = call i32 asm sideeffect alignstack "movl $1, $0", "=={ebx},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %y) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp1, i32* %"%ebx"
+  %1 = call i32 asm "", "={bx}"() nounwind        ; <i32> [#uses=1]
+  %2 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
+  %asmtmp2 = call i32 asm sideeffect alignstack "addl $1, $0", "=={eax},{ebx},{eax},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %1, i32 %2) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp2, i32* %"%eax"
+  %3 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
+  call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind
+  %4 = load i32* %result, align 4                 ; <i32> [#uses=1]
+  %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
+  store i32 0, i32* %0, align 4
+  %6 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %6, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval3 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval3
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
new file mode 100644
index 0000000..23c509c
--- /dev/null
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | grep mov | count 5
+; PR2659
+
+define i32 @binomial(i32 %n, i32 %k) nounwind {
+entry:
+	%cmp = icmp ugt i32 %k, %n		; <i1> [#uses=1]
+	br i1 %cmp, label %ifthen, label %forcond.preheader
+
+forcond.preheader:		; preds = %entry
+	%cmp44 = icmp eq i32 %k, 0		; <i1> [#uses=1]
+	br i1 %cmp44, label %afterfor, label %forbody
+
+ifthen:		; preds = %entry
+	ret i32 0
+
+forbody:		; preds = %forbody, %forcond.preheader
+	%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]		; <i32> [#uses=3]
+	%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
+	%divisor.02 = add i32 %indvar, 1		; <i32> [#uses=2]
+	%n.addr.03 = sub i32 %n, %indvar		; <i32> [#uses=1]
+	%mul = mul i32 %n.addr.03, %accumulator.01		; <i32> [#uses=1]
+	%div = udiv i32 %mul, %divisor.02		; <i32> [#uses=2]
+	%inc = add i32 %indvar, 2		; <i32> [#uses=1]
+	%cmp4 = icmp ugt i32 %inc, %k		; <i1> [#uses=1]
+	br i1 %cmp4, label %afterfor, label %forbody
+
+afterfor:		; preds = %forbody, %forcond.preheader
+	%accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
+	ret i32 %accumulator.0.lcssa
+}

diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
new file mode 100644
index 0000000..2c855ce
--- /dev/null
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+
+	%struct.dpoint = type { double, double }
+
+define %struct.dpoint @midpoint(i64 %p1.0, i64 %p2.0) nounwind readnone {
+entry:
+	%0 = trunc i64 %p1.0 to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = trunc i64 %p2.0 to i32		; <i32> [#uses=1]
+	%3 = sitofp i32 %2 to double		; <double> [#uses=1]
+	%4 = fadd double %1, %3		; <double> [#uses=1]
+	%5 = fmul double %4, 5.000000e-01		; <double> [#uses=1]
+	%6 = lshr i64 %p1.0, 32		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
+	%8 = sitofp i32 %7 to double		; <double> [#uses=1]
+	%9 = lshr i64 %p2.0, 32		; <i64> [#uses=1]
+	%10 = trunc i64 %9 to i32		; <i32> [#uses=1]
+	%11 = sitofp i32 %10 to double		; <double> [#uses=1]
+	%12 = fadd double %8, %11		; <double> [#uses=1]
+	%13 = fmul double %12, 5.000000e-01		; <double> [#uses=1]
+	%mrv3 = insertvalue %struct.dpoint undef, double %5, 0		; <%struct.dpoint> [#uses=1]
+	%mrv4 = insertvalue %struct.dpoint %mrv3, double %13, 1		; <%struct.dpoint> [#uses=1]
+	ret %struct.dpoint %mrv4
+}

diff --git a/test/CodeGen/X86/pic-load-remat.ll b/test/CodeGen/X86/pic-load-remat.ll
new file mode 100644
index 0000000..7729752
--- /dev/null
+++ b/test/CodeGen/X86/pic-load-remat.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
+
+define void @f() nounwind  {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp4403 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4443 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4609 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 3, i32 5, i32 6, i32 9 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4651 = add <8 x i16> %tmp4609, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4658 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4651, <8 x i16> bitcast (<4 x i32> < i32 4, i32 1, i32 2, i32 3 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4669 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> %tmp4443 ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4679 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4669, <8 x i16> %tmp4669 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4689 = add <8 x i16> %tmp4679, %tmp4658		; <<8 x i16>> [#uses=1]
+	%tmp4700 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4689, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4708 = bitcast <8 x i16> %tmp4700 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4772 = add <8 x i16> zeroinitializer, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4779 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4772, <8 x i16> bitcast (<4 x i32> < i32 3, i32 5, i32 undef, i32 7 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4810 = add <8 x i16> zeroinitializer, %tmp4779		; <<8 x i16>> [#uses=1]
+	%tmp4821 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4810, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4829 = bitcast <8 x i16> %tmp4821 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4900 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 1, i32 1, i32 2, i32 2 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4911 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4921 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4911, <8 x i16> %tmp4911 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4931 = add <8 x i16> %tmp4921, %tmp4900		; <<8 x i16>> [#uses=1]
+	%tmp4942 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4931, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4950 = bitcast <8 x i16> %tmp4942 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4957 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4958 = bitcast <8 x i16> %tmp4957 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4967 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4968 = bitcast <8 x i16> %tmp4967 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp4829, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4958, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4968, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4950, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4708, <2 x i64>* null, align 16
+	br label %bb
+}
+
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 

diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
new file mode 100644
index 0000000..d3c28a0
--- /dev/null
+++ b/test/CodeGen/X86/pic.ll

@@ -0,0 +1,208 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
+
+@ptr = external global i32* 
+@dst = external global i32 
+@src = external global i32 
+
+define void @test1() nounwind {
+entry:
+    store i32* @dst, i32** @ptr
+    %tmp.s = load i32* @src
+    store i32 %tmp.s, i32* @dst
+    ret void
+    
+; LINUX:    test1:
+; LINUX:	call	.L1$pb
+; LINUX-NEXT: .L1$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref1-.L1$pb),
+; LINUX:	movl	dst@GOT(%eax),
+; LINUX:	movl	ptr@GOT(%eax),
+; LINUX:	movl	src@GOT(%eax),
+; LINUX:	ret
+}
+
+@ptr2 = global i32* null
+@dst2 = global i32 0
+@src2 = global i32 0
+
+define void @test2() nounwind {
+entry:
+    store i32* @dst2, i32** @ptr2
+    %tmp.s = load i32* @src2
+    store i32 %tmp.s, i32* @dst2
+    ret void
+    
+; LINUX: test2:
+; LINUX:	call	.L2$pb
+; LINUX-NEXT: .L2$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref2-.L2$pb), %eax
+; LINUX:	movl	dst2@GOT(%eax),
+; LINUX:	movl	ptr2@GOT(%eax),
+; LINUX:	movl	src2@GOT(%eax),
+; LINUX:	ret
+
+}
+
+declare i8* @malloc(i32)
+
+define void @test3() nounwind {
+entry:
+    %ptr = call i8* @malloc(i32 40)
+    ret void
+; LINUX: test3:
+; LINUX: 	pushl	%ebx
+; LINUX-NEXT: 	subl	$8, %esp
+; LINUX-NEXT: 	call	.L3$pb
+; LINUX-NEXT: .L3$pb:
+; LINUX-NEXT: 	popl	%ebx
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref3-.L3$pb), %ebx
+; LINUX: 	movl	$40, (%esp)
+; LINUX: 	call	malloc@PLT
+; LINUX: 	addl	$8, %esp
+; LINUX: 	popl	%ebx
+; LINUX: 	ret
+}
+
+@pfoo = external global void(...)* 
+
+define void @test4() nounwind {
+entry:
+    %tmp = call void(...)*(...)* @afoo()
+    store void(...)* %tmp, void(...)** @pfoo
+    %tmp1 = load void(...)** @pfoo
+    call void(...)* %tmp1()
+    ret void
+; LINUX: test4:
+; LINUX: 	call	.L4$pb
+; LINUX-NEXT: .L4$pb:
+; LINUX: 	popl
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref4-.L4$pb),
+; LINUX: 	movl	pfoo@GOT(%esi),
+; LINUX: 	call	afoo@PLT
+; LINUX: 	call	*
+}
+
+declare void(...)* @afoo(...)
+
+define void @test5() nounwind {
+entry:
+    call void(...)* @foo()
+    ret void
+; LINUX: test5:
+; LINUX: call	.L5$pb
+; LINUX: popl	%ebx
+; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref5-.L5$pb), %ebx
+; LINUX: call	foo@PLT
+}
+
+declare void @foo(...)
+
+
+@ptr6 = internal global i32* null
+@dst6 = internal global i32 0
+@src6 = internal global i32 0
+
+define void @test6() nounwind {
+entry:
+    store i32* @dst6, i32** @ptr6
+    %tmp.s = load i32* @src6
+    store i32 %tmp.s, i32* @dst6
+    ret void
+    
+; LINUX: test6:
+; LINUX: 	call	.L6$pb
+; LINUX-NEXT: .L6$pb:
+; LINUX-NEXT: 	popl	%eax
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref6-.L6$pb), %eax
+; LINUX: 	leal	dst6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, ptr6@GOTOFF(%eax)
+; LINUX: 	movl	src6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, dst6@GOTOFF(%eax)
+; LINUX: 	ret
+}
+
+
+;; Test constant pool references.
+define double @test7(i32 %a.u) nounwind {
+entry:
+    %tmp = icmp eq i32 %a.u,0
+    %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
+    ret double %retval
+
+; LINUX: .LCPI7_0:
+
+; LINUX: test7:
+; LINUX:    call .L7$pb
+; LINUX: .L7$pb:
+; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref7-.L7$pb), 
+; LINUX:    fldl	.LCPI7_0@GOTOFF(
+}
+
+
+;; Test jump table references.
+define void @test8(i32 %n.u) nounwind {
+entry:
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+    
+; LINUX: test8:
+; LINUX:   call	.L8$pb
+; LINUX: .L8$pb:
+; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.Lpicbaseref8-.L8$pb),
+; LINUX:   addl	.LJTI8_0@GOTOFF(
+; LINUX:   jmpl	*
+
+; LINUX: .LJTI8_0:
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_2@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+; LINUX:   .long	 .LBB8_3@GOTOFF
+; LINUX:   .long	 .LBB8_7@GOTOFF
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)

diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
new file mode 100644
index 0000000..b3750c1
--- /dev/null
+++ b/test/CodeGen/X86/pic_jumptable.ll

@@ -0,0 +1,78 @@
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | not grep -F .text
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | not grep lea
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | grep add | count 2
+; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
+; rdar://6971437
+
+declare void @_Z3bari(i32)
+
+define linkonce void @_Z3fooILi1EEvi(i32 %Y) nounwind {
+entry:
+	%Y_addr = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %Y, i32* %Y_addr
+	%tmp = load i32* %Y_addr		; <i32> [#uses=1]
+	switch i32 %tmp, label %bb10 [
+		 i32 0, label %bb3
+		 i32 1, label %bb
+		 i32 2, label %bb
+		 i32 3, label %bb
+		 i32 4, label %bb
+		 i32 5, label %bb
+		 i32 6, label %bb
+		 i32 7, label %bb
+		 i32 8, label %bb
+		 i32 9, label %bb
+		 i32 10, label %bb
+		 i32 12, label %bb1
+		 i32 13, label %bb5
+		 i32 14, label %bb6
+		 i32 16, label %bb2
+		 i32 17, label %bb4
+		 i32 23, label %bb8
+		 i32 27, label %bb7
+		 i32 34, label %bb9
+	]
+
+bb:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	br label %bb2
+
+bb2:		; preds = %bb1, %entry
+	call void @_Z3bari( i32 1 )
+	br label %bb11
+
+bb3:		; preds = %entry
+	br label %bb4
+
+bb4:		; preds = %bb3, %entry
+	br label %bb5
+
+bb5:		; preds = %bb4, %entry
+	br label %bb6
+
+bb6:		; preds = %bb5, %entry
+	call void @_Z3bari( i32 2 )
+	br label %bb11
+
+bb7:		; preds = %entry
+	br label %bb8
+
+bb8:		; preds = %bb7, %entry
+	br label %bb9
+
+bb9:		; preds = %bb8, %entry
+	call void @_Z3bari( i32 3 )
+	br label %bb11
+
+bb10:		; preds = %entry
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb9, %bb6, %bb2
+	br label %return
+
+return:		; preds = %bb11
+	ret void
+}

diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
new file mode 100644
index 0000000..e2746a8
--- /dev/null
+++ b/test/CodeGen/X86/pmul.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
+; RUN: grep pmul %t | count 12
+; RUN: grep mov %t | count 12
+
+define <4 x i32> @a(<4 x i32> %i) nounwind  {
+        %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
+        ret <4 x i32> %A
+}
+define <2 x i64> @b(<2 x i64> %i) nounwind  {
+        %A = mul <2 x i64> %i, < i64 117, i64 117 >
+        ret <2 x i64> %A
+}
+define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind  {
+        %A = mul <4 x i32> %i, %j
+        ret <4 x i32> %A
+}
+define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind  {
+        %A = mul <2 x i64> %i, %j
+        ret <2 x i64> %A
+}
+; Use a call to force spills.
+declare void @foo()
+define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind  {
+        call void @foo()
+        %A = mul <4 x i32> %i, %j
+        ret <4 x i32> %A
+}
+define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind  {
+        call void @foo()
+        %A = mul <2 x i64> %i, %j
+        ret <2 x i64> %A
+}

diff --git a/test/CodeGen/X86/postalloc-coalescing.ll b/test/CodeGen/X86/postalloc-coalescing.ll
new file mode 100644
index 0000000..a171436
--- /dev/null
+++ b/test/CodeGen/X86/postalloc-coalescing.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 | grep mov | count 3
+
+define fastcc i32 @_Z18yy_get_next_bufferv() {
+entry:
+	br label %bb131
+
+bb116:		; preds = %bb131
+	%tmp125126 = trunc i32 %c.1 to i8		; <i8> [#uses=1]
+	store i8 %tmp125126, i8* null, align 1
+	br label %bb131
+
+bb131:		; preds = %bb116, %entry
+	%c.2 = phi i32 [ %c.1, %bb116 ], [ 42, %entry ]		; <i32> [#uses=1]
+	%c.1 = select i1 false, i32 0, i32 %c.2		; <i32> [#uses=4]
+	%tmp181 = icmp eq i32 %c.1, -1		; <i1> [#uses=1]
+	br i1 %tmp181, label %bb158, label %bb116
+
+bb158:		; preds = %bb131
+	br i1 true, label %cond_true163, label %cond_next178
+
+cond_true163:		; preds = %bb158
+	%tmp172173 = trunc i32 %c.1 to i8		; <i8> [#uses=1]
+	store i8 %tmp172173, i8* null, align 1
+	br label %cond_next178
+
+cond_next178:		; preds = %cond_true163, %bb158
+	%tmp180 = icmp eq i32 %c.1, -1		; <i1> [#uses=1]
+	br i1 %tmp180, label %cond_next184, label %cond_next199
+
+cond_next184:		; preds = %cond_next178
+	ret i32 0
+
+cond_next199:		; preds = %cond_next178
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/powi.ll b/test/CodeGen/X86/powi.ll
new file mode 100644
index 0000000..c3d6831
--- /dev/null
+++ b/test/CodeGen/X86/powi.ll

@@ -0,0 +1,11 @@
+; RUN: llc %s -march=x86 -mcpu=yonah -o - | grep mulsd | count 6
+; Ideally this would compile to 5 multiplies.
+
+define double @_Z3f10d(double %a) nounwind readonly ssp noredzone {
+entry:
+  %0 = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  ret double %0
+}
+
+declare double @llvm.powi.f64(double, i32) nounwind readonly
+

diff --git a/test/CodeGen/X86/pr1462.ll b/test/CodeGen/X86/pr1462.ll
new file mode 100644
index 0000000..62549a5
--- /dev/null
+++ b/test/CodeGen/X86/pr1462.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s
+; PR1462
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-
+v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define hidden i128 @__addvti3(i128 %a1, i128 %b2) {
+entry:
+        %tmp8 = add i128 %b2, %a1               ; <i128> [#uses=3]
+        %tmp10 = icmp sgt i128 %b2, -1          ; <i1> [#uses=1]
+        %tmp18 = icmp sgt i128 %tmp8, %a1               ; <i1> [#uses=1]
+        %tmp14 = icmp slt i128 %tmp8, %a1               ; <i1> [#uses=1]
+        %iftmp.0.0.in = select i1 %tmp10, i1 %tmp14, i1 %tmp18          ; <i1> [#uses=1]
+        br i1 %iftmp.0.0.in, label %cond_true22, label %cond_next23
+
+cond_true22:            ; preds = %entry
+        tail call void @abort( )
+        unreachable
+
+cond_next23:            ; preds = %entry
+        ret i128 %tmp8
+}
+
+declare void @abort()

diff --git a/test/CodeGen/X86/pr1489.ll b/test/CodeGen/X86/pr1489.ll
new file mode 100644
index 0000000..c9e24bf
--- /dev/null
+++ b/test/CodeGen/X86/pr1489.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep -- -1236950581 | count 1
+;; magic constants are 3.999f and half of 3.999
+; ModuleID = '1489.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+@.str = internal constant [13 x i8] c"%d %d %d %d\0A\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @quux() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+declare i32 @lrintf(float)
+
+define i32 @foo() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrint( double 3.999000e+00 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+declare i32 @lrint(double)
+
+define i32 @bar() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+define i32 @baz() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+define i32 @main() nounwind {
+entry:
+	%tmp = tail call i32 @baz( )		; <i32> [#uses=1]
+	%tmp1 = tail call i32 @bar( )		; <i32> [#uses=1]
+	%tmp2 = tail call i32 @foo( )		; <i32> [#uses=1]
+	%tmp3 = tail call i32 @quux( )		; <i32> [#uses=1]
+	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 %tmp3, i32 %tmp2, i32 %tmp1, i32 %tmp )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/test/CodeGen/X86/pr1505.ll b/test/CodeGen/X86/pr1505.ll
new file mode 100644
index 0000000..883a806
--- /dev/null
+++ b/test/CodeGen/X86/pr1505.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mcpu=i486 | not grep fldl
+; PR1505
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+@G = weak global float 0.000000e+00		; <float*> [#uses=1]
+
+define void @t1(float %F) {
+entry:
+	store float %F, float* @G
+	ret void
+}

diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
new file mode 100644
index 0000000..12736cd
--- /dev/null
+++ b/test/CodeGen/X86/pr1505b.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -mcpu=i486 | grep fstpl | count 4
+; RUN: llc < %s -mcpu=i486 | grep fstps | count 3
+; PR1505
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ctype_base" = type <{ i8 }>
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+@a = global float 0x3FD3333340000000		; <float*> [#uses=1]
+@b = global double 6.000000e-01, align 8		; <double*> [#uses=1]
+@_ZSt8__ioinit = internal global %"struct.std::ctype_base" zeroinitializer		; <%"struct.std::ctype_base"*> [#uses=2]
+@__dso_handle = external global i8*		; <i8**> [#uses=1]
+@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=2]
+@.str = internal constant [12 x i8] c"tan float: \00"		; <[12 x i8]*> [#uses=1]
+@.str1 = internal constant [13 x i8] c"tan double: \00"		; <[13 x i8]*> [#uses=1]
+
+declare void @_ZNSt8ios_base4InitD1Ev(%"struct.std::ctype_base"*)
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"struct.std::ctype_base"*)
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+define i32 @main() {
+entry:
+	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
+	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp1920 = fpext float %tmp9 to double		; <double> [#uses=1]
+	%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+	%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+	%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp51 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp42 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+	ret i32 0
+}
+
+declare float @tanf(float)
+
+declare double @tan(double)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, double)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"struct.std::basic_ostream<char,std::char_traits<char> >"*)

diff --git a/test/CodeGen/X86/pr2177.ll b/test/CodeGen/X86/pr2177.ll
new file mode 100644
index 0000000..e941bf7
--- /dev/null
+++ b/test/CodeGen/X86/pr2177.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s
+; PR2177
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.1.0"
+	%struct.S2259 = type { <4 x i16>, i8, i64 }
+
+define void @check2259va(i32 %z, ...) {
+entry:
+	br i1 false, label %bb5, label %return
+bb5:		; preds = %entry
+	switch i32 0, label %bb155 [
+		 i32 16, label %bb10
+		 i32 17, label %bb118
+		 i32 18, label %bb54
+		 i32 32, label %bb118
+		 i32 33, label %bb118
+		 i32 36, label %bb118
+	]
+bb10:		; preds = %bb5
+	ret void
+bb54:		; preds = %bb5
+	ret void
+bb118:		; preds = %bb5, %bb5, %bb5, %bb5
+	%tmp125 = load i8** null, align 8		; <i8*> [#uses=1]
+	%tmp125126 = bitcast i8* %tmp125 to %struct.S2259*		; <%struct.S2259*> [#uses=1]
+	%tmp128 = getelementptr %struct.S2259* %tmp125126, i32 0, i32 0		; <<4 x i16>*> [#uses=1]
+	%tmp129 = load <4 x i16>* %tmp128, align 8		; <<4 x i16>> [#uses=1]
+	store <4 x i16> %tmp129, <4 x i16>* null, align 8
+	ret void
+bb155:		; preds = %bb5
+	ret void
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
new file mode 100644
index 0000000..f97663c
--- /dev/null
+++ b/test/CodeGen/X86/pr2182.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s | grep {addl	\$3, (%eax)} | count 4
+; PR2182
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@x = weak global i32 0          ; <i32*> [#uses=8]
+
+define void @loop_2() nounwind  {
+entry:
+        %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+        %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
+        volatile store i32 %tmp1, i32* @x, align 4
+        %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+        %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
+        volatile store i32 %tmp1.1, i32* @x, align 4
+        %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+        %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
+        volatile store i32 %tmp1.2, i32* @x, align 4
+        %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+        %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
+        volatile store i32 %tmp1.3, i32* @x, align 4
+        ret void
+}

diff --git a/test/CodeGen/X86/pr2326.ll b/test/CodeGen/X86/pr2326.ll
new file mode 100644
index 0000000..f82dcb5
--- /dev/null
+++ b/test/CodeGen/X86/pr2326.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep sete
+; PR2326
+
+define i32 @func_59(i32 %p_60) nounwind  {
+entry:
+	%l_108 = alloca i32		; <i32*> [#uses=2]
+	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp16 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp17 = icmp eq i32 %tmp15, %tmp16		; <i1> [#uses=1]
+	%tmp1718 = zext i1 %tmp17 to i8		; <i8> [#uses=1]
+	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp20 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp21 = icmp ule i32 %tmp19, %tmp20		; <i1> [#uses=1]
+	%tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
+	%toBool23 = icmp ne i8 %tmp1718, 0		; <i1> [#uses=1]
+	%toBool24 = icmp ne i8 %tmp2122, 0		; <i1> [#uses=1]
+	%tmp25 = and i1 %toBool23, %toBool24		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%tmp252627 = zext i8 %tmp2526 to i32		; <i32> [#uses=1]
+	%tmp29 = call i32 (...)* @func_15( i32 %tmp252627, i32 0 ) nounwind 		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @func_15(...)

diff --git a/test/CodeGen/X86/pr2623.ll b/test/CodeGen/X86/pr2623.ll
new file mode 100644
index 0000000..5d0eb5d
--- /dev/null
+++ b/test/CodeGen/X86/pr2623.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s
+; PR2623
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-unknown-freebsd7.0"
+	%.objc_id = type { %.objc_id }*
+	%.objc_selector = type { i8*, i8* }*
+@.objc_sel_ptr = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr13 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr14 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr15 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr16 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr17 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr18 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr19 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr20 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr21 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+
+@.objc_untyped_selector_alias = alias internal %.objc_selector* @.objc_sel_ptr15		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias1 = alias internal %.objc_selector* @.objc_sel_ptr		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias2 = alias internal %.objc_selector* @.objc_sel_ptr17		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias3 = alias internal %.objc_selector* @.objc_sel_ptr16		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias4 = alias internal %.objc_selector* @.objc_sel_ptr13		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias7 = alias internal %.objc_selector* @.objc_sel_ptr14		; <%.objc_selector*> [#uses=0]
+@getRange = alias internal %.objc_selector* @.objc_sel_ptr18		; <%.objc_selector*> [#uses=0]
+@"valueWithRange:" = alias internal %.objc_selector* @.objc_sel_ptr21		; <%.objc_selector*> [#uses=0]
+@rangeValue = alias internal %.objc_selector* @.objc_sel_ptr20		; <%.objc_selector*> [#uses=0]
+@"printRange:" = alias internal %.objc_selector* @.objc_sel_ptr19		; <%.objc_selector*> [#uses=0]
+
+define void @"._objc_method_SmalltalkTool()-run"(i8* %self, %.objc_selector %_cmd) {
+entry:
+	br i1 false, label %small_int_messagerangeValue, label %real_object_messagerangeValue
+
+small_int_messagerangeValue:		; preds = %entry
+	br label %Continue
+
+real_object_messagerangeValue:		; preds = %entry
+	br label %Continue
+
+Continue:		; preds = %real_object_messagerangeValue, %small_int_messagerangeValue
+	%rangeValue = phi { i32, i32 } [ undef, %small_int_messagerangeValue ], [ undef, %real_object_messagerangeValue ]		; <{ i32, i32 }> [#uses=1]
+	call void (%.objc_id, %.objc_selector, ...)* null( %.objc_id null, %.objc_selector null, { i32, i32 } %rangeValue )
+	ret void
+}

diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
new file mode 100644
index 0000000..afd7114
--- /dev/null
+++ b/test/CodeGen/X86/pr2656.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; PR2656
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.4.0"
+	%struct.anon = type <{ float, float }>
+@.str = internal constant [17 x i8] c"pt: %.0f, %.0f\0A\00\00"		; <[17 x i8]*> [#uses=1]
+
+define void @foo(%struct.anon* byval %p) nounwind {
+entry:
+	%tmp = getelementptr %struct.anon* %p, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp1 = load float* %tmp		; <float> [#uses=1]
+	%tmp2 = getelementptr %struct.anon* %p, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp3 = load float* %tmp2		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %tmp1		; <float> [#uses=1]
+	%conv = fpext float %neg to double		; <double> [#uses=1]
+	%neg4 = fsub float -0.000000e+00, %tmp3		; <float> [#uses=1]
+	%conv5 = fpext float %neg4 to double		; <double> [#uses=1]
+	%call = call i32 (...)* @printf( i8* getelementptr ([17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(...)

diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
new file mode 100644
index 0000000..0760e4c
--- /dev/null
+++ b/test/CodeGen/X86/pr2659.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; PR2659
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.4.0"
+
+define i32 @binomial(i32 %n, i32 %k) nounwind  {
+entry:
+  %cmp = icmp ugt i32 %k, %n            ; <i1> [#uses=1]
+  br i1 %cmp, label %ifthen, label %forcond.preheader
+
+forcond.preheader:              ; preds = %entry
+  %cmp44 = icmp eq i32 %k, 0            ; <i1> [#uses=1]
+  br i1 %cmp44, label %afterfor, label %forbody
+
+ifthen:         ; preds = %entry
+  ret i32 0
+
+forbody:                ; preds = %forbody, %forcond.preheader
+  %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]                ; <i32> [#uses=3]
+  %accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]               ; <i32> [#uses=1]
+  %divisor.02 = add i32 %indvar, 1              ; <i32> [#uses=2]
+  %n.addr.03 = sub i32 %n, %indvar              ; <i32> [#uses=1]
+  %mul = mul i32 %n.addr.03, %accumulator.01            ; <i32> [#uses=1]
+  %div = udiv i32 %mul, %divisor.02             ; <i32> [#uses=2]
+  %inc = add i32 %indvar, 2             ; <i32> [#uses=1]
+  %cmp4 = icmp ugt i32 %inc, %k         ; <i1> [#uses=1]
+  br i1 %cmp4, label %afterfor, label %forbody
+
+afterfor:               ; preds = %forbody, %forcond.preheader
+  %accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]          ; <i32> [#uses=1]
+  ret i32 %accumulator.0.lcssa
+}

diff --git a/test/CodeGen/X86/pr2849.ll b/test/CodeGen/X86/pr2849.ll
new file mode 100644
index 0000000..0fec481
--- /dev/null
+++ b/test/CodeGen/X86/pr2849.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s
+; PR2849
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.BaseBoundPtrs = type { i8*, i8* }
+	%struct.HashEntry = type { %struct.BaseBoundPtrs }
+	%struct.NODE = type { i8, i8, %struct.anon }
+	%struct.anon = type { %struct.xlist }
+	%struct.xlist = type { %struct.NODE*, %struct.NODE* }
+	%struct.xvect = type { %struct.NODE** }
+@hash_table_begin = external global %struct.HashEntry*
+
+define void @obshow() {
+entry:
+	%tmp = load %struct.HashEntry** @hash_table_begin, align 8
+	br i1 false, label %xlygetvalue.exit, label %xlygetvalue.exit
+
+xlygetvalue.exit:
+	%storemerge.in.i = phi %struct.NODE** [ null, %entry ], [ null, %entry ]
+	%storemerge.i = load %struct.NODE** %storemerge.in.i
+	%tmp1 = ptrtoint %struct.NODE** %storemerge.in.i to i64
+	%tmp2 = lshr i64 %tmp1, 3
+	%tmp3 = and i64 %tmp2, 2147483647
+	%tmp4 = getelementptr %struct.HashEntry* %tmp, i64 %tmp3, i32 0, i32 1
+	%tmp7 = load i8** %tmp4, align 8
+	%tmp8 = getelementptr %struct.NODE* %storemerge.i, i64 0, i32 2
+	%tmp9 = bitcast %struct.anon* %tmp8 to %struct.NODE***
+	%tmp11 = load %struct.NODE*** %tmp9, align 8
+	%tmp12 = ptrtoint %struct.NODE** %tmp11 to i64
+	%tmp13 = lshr i64 %tmp12, 3
+	%tmp14 = and i64 %tmp13, 2147483647
+	%tmp15 = getelementptr %struct.HashEntry* %tmp, i64 %tmp14, i32 0, i32 1
+	call fastcc void @xlprint(i8** %tmp4, i8* %tmp7, i8** %tmp15)
+	ret void
+}
+
+declare fastcc void @xlprint(i8**, i8*, i8**)

diff --git a/test/CodeGen/X86/pr2924.ll b/test/CodeGen/X86/pr2924.ll
new file mode 100644
index 0000000..b9e8dc1
--- /dev/null
+++ b/test/CodeGen/X86/pr2924.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR2924
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define x86_stdcallcc { i32, i8* } @_D3std6string7toupperFAaZAa({ i32, i8* } %s) {
+entry_std.string.toupper:
+        %tmp58 = load i32* null
+        %tmp59 = icmp eq i32 %tmp58, 0
+        %r.val = load { i32, i8* }* null, align 8
+        %condtmp.0 = select i1 %tmp59, { i32, i8* } undef, { i32, i8* } %r.val 
+
+        ret { i32, i8* } %condtmp.0
+}
+define { } @empty({ } %s) {
+entry_std.string.toupper:
+        %tmp58 = load i32* null
+        %tmp59 = icmp eq i32 %tmp58, 0
+        %r.val = load { }* null, align 8
+        %condtmp.0 = select i1 %tmp59, { } undef, { } %r.val
+        ret { } %condtmp.0
+}

diff --git a/test/CodeGen/X86/pr2982.ll b/test/CodeGen/X86/pr2982.ll
new file mode 100644
index 0000000..3f9a595
--- /dev/null
+++ b/test/CodeGen/X86/pr2982.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86
+; PR2982
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+@g_279 = external global i32            ; <i32*> [#uses=1]
+@g_265 = external global i32            ; <i32*> [#uses=1]
+@g_3 = external global i8               ; <i8*> [#uses=1]
+
+declare i32 @rshift_u_u(...)
+
+define void @bar() nounwind {
+entry:
+        %0 = load i32* @g_279, align 4          ; <i32> [#uses=1]
+        %1 = shl i32 %0, 1              ; <i32> [#uses=1]
+        %2 = and i32 %1, 2              ; <i32> [#uses=1]
+        %3 = load i32* @g_265, align 4          ; <i32> [#uses=1]
+        %4 = load i8* @g_3, align 1             ; <i8> [#uses=1]
+        %5 = sext i8 %4 to i32          ; <i32> [#uses=1]
+        %6 = add i32 %2, %3             ; <i32> [#uses=1]
+        %7 = add i32 %6, %5             ; <i32> [#uses=1]
+        %8 = tail call i32 (...)* @rshift_u_u(i32 %7, i32 0) nounwind          
+; <i32> [#uses=0]
+        ret void
+}

diff --git a/test/CodeGen/X86/pr3154.ll b/test/CodeGen/X86/pr3154.ll
new file mode 100644
index 0000000..18df97c
--- /dev/null
+++ b/test/CodeGen/X86/pr3154.ll

@@ -0,0 +1,104 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
+; PR3154
+
+define void @ff_flac_compute_autocorr_sse2(i32* %data, i32 %len, i32 %lag, double* %autoc) nounwind {
+entry:
+	%c = alloca double, align 8		; <double*> [#uses=2]
+	%0 = add i32 %len, 2		; <i32> [#uses=1]
+	%1 = add i32 %0, %lag		; <i32> [#uses=1]
+	%2 = alloca double, i32 %1		; <double*> [#uses=2]
+	%3 = getelementptr double* %2, i32 %lag		; <double*> [#uses=2]
+	%4 = ptrtoint double* %3 to i32		; <i32> [#uses=1]
+	%5 = and i32 %4, 8		; <i32> [#uses=1]
+	%6 = icmp eq i32 %5, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb19, label %bb
+
+bb:		; preds = %entry
+	%.sum = add i32 %lag, 1		; <i32> [#uses=1]
+	%7 = getelementptr double* %2, i32 %.sum		; <double*> [#uses=1]
+	br label %bb19
+
+bb19:		; preds = %bb, %entry
+	%data15.0 = phi double* [ %7, %bb ], [ %3, %entry ]		; <double*> [#uses=5]
+	%8 = sitofp i32 %len to double		; <double> [#uses=1]
+	%9 = fsub double %8, 1.000000e+00		; <double> [#uses=1]
+	%10 = fdiv double 2.000000e+00, %9		; <double> [#uses=1]
+	store double %10, double* %c, align 8
+	%11 = ashr i32 %len, 1		; <i32> [#uses=3]
+	%12 = mul i32 %11, -4		; <i32> [#uses=2]
+	%13 = shl i32 %len, 1		; <i32> [#uses=1]
+	%14 = and i32 %13, -4		; <i32> [#uses=2]
+	call void asm sideeffect "movsd   $0,     %xmm7                \0A\09movapd  ff_pd_1, %xmm6     \0A\09movapd  ff_pd_2, %xmm5     \0A\09movlhps %xmm7, %xmm7                \0A\09subpd   %xmm5, %xmm7                \0A\09addsd   %xmm6, %xmm7                \0A\09", "*m,~{dirflag},~{fpsr},~{flags}"(double* %c) nounwind
+	%15 = and i32 %len, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %15, 0		; <i1> [#uses=1]
+	%16 = getelementptr double* %data15.0, i32 %11		; <double*> [#uses=2]
+	%17 = getelementptr i32* %data, i32 %11		; <i32*> [#uses=2]
+	br i1 %toBool, label %bb22, label %bb20
+
+bb20:		; preds = %bb19
+	%asmtmp = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -1*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movupd    %xmm3, -1*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
+	br label %bb28.preheader
+
+bb22:		; preds = %bb19
+	%asmtmp23 = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -2*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movapd    %xmm3, -2*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
+	br label %bb28.preheader
+
+bb28.preheader:		; preds = %bb22, %bb20
+	%18 = icmp sgt i32 %lag, 0		; <i1> [#uses=2]
+	br i1 %18, label %bb27, label %bb29
+
+bb27:		; preds = %bb27, %bb28.preheader
+	%j4.042 = phi i32 [ 0, %bb28.preheader ], [ %indvar.next45, %bb27 ]		; <i32> [#uses=2]
+	%19 = sub i32 %j4.042, %lag		; <i32> [#uses=1]
+	%20 = getelementptr double* %data15.0, i32 %19		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %20, align 8
+	%indvar.next45 = add i32 %j4.042, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next45, %lag		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb29, label %bb27
+
+bb29:		; preds = %bb27, %bb28.preheader
+	%21 = getelementptr double* %data15.0, i32 %len		; <double*> [#uses=3]
+	store double 0.000000e+00, double* %21, align 8
+	br i1 %18, label %bb.nph, label %bb37
+
+bb.nph:		; preds = %bb29
+	%22 = mul i32 %len, -8		; <i32> [#uses=2]
+	%23 = add i32 %lag, -2		; <i32> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %bb35, %bb.nph
+	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb35 ]		; <i32> [#uses=2]
+	%j4.141 = shl i32 %indvar, 1		; <i32> [#uses=8]
+	%24 = icmp eq i32 %23, %j4.141		; <i1> [#uses=1]
+	%25 = or i32 %j4.141, 1		; <i32> [#uses=2]
+	br i1 %24, label %bb31, label %bb33
+
+bb31:		; preds = %bb30
+	%26 = add i32 %j4.141, 2		; <i32> [#uses=2]
+	%.sum38 = sub i32 %len, %j4.141		; <i32> [#uses=1]
+	%27 = getelementptr double* %data15.0, i32 %.sum38		; <double*> [#uses=1]
+	%28 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
+	%29 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
+	%30 = getelementptr double* %autoc, i32 %26		; <double*> [#uses=1]
+	%asmtmp32 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\09movsd    ff_pd_1, %xmm2 \0A\091:                                 \0A\09movapd   ($4,$0), %xmm3           \0A\09movupd -8($5,$0), %xmm4           \0A\09movapd   ($5,$0), %xmm5           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd     %xmm3, %xmm5           \0A\09mulpd -16($5,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm5, %xmm0           \0A\09addpd     %xmm3, %xmm2           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09movhlps   %xmm2, %xmm5           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09addsd     %xmm5, %xmm2           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09movsd     %xmm2, $3               \0A\09", "=&r,=*m,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %28, double* %29, double* %30, double* %21, double* %27, i32 %22) nounwind		; <i32> [#uses=0]
+	br label %bb35
+
+bb33:		; preds = %bb30
+	%.sum39 = sub i32 %len, %j4.141		; <i32> [#uses=1]
+	%31 = getelementptr double* %data15.0, i32 %.sum39		; <double*> [#uses=1]
+	%32 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
+	%33 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
+	%asmtmp34 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\091:                                 \0A\09movapd   ($3,$0), %xmm3           \0A\09movupd -8($4,$0), %xmm4           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd    ($4,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm3, %xmm0           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09", "=&r,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %32, double* %33, double* %21, double* %31, i32 %22) nounwind		; <i32> [#uses=0]
+	%.pre = add i32 %j4.141, 2		; <i32> [#uses=1]
+	br label %bb35
+
+bb35:		; preds = %bb33, %bb31
+	%.pre-phi = phi i32 [ %.pre, %bb33 ], [ %26, %bb31 ]		; <i32> [#uses=1]
+	%34 = icmp slt i32 %.pre-phi, %lag		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %34, label %bb30, label %bb37
+
+bb37:		; preds = %bb35, %bb29
+	ret void
+}

diff --git a/test/CodeGen/X86/pr3216.ll b/test/CodeGen/X86/pr3216.ll
new file mode 100644
index 0000000..38c9f32
--- /dev/null
+++ b/test/CodeGen/X86/pr3216.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep {sar.	\$5}
+
+@foo = global i8 127
+
+define i32 @main() nounwind {
+entry:
+        %tmp = load i8* @foo
+        %bf.lo = lshr i8 %tmp, 5
+        %bf.lo.cleared = and i8 %bf.lo, 7
+        %0 = shl i8 %bf.lo.cleared, 5
+        %bf.val.sext = ashr i8 %0, 5
+        %conv = sext i8 %bf.val.sext to i32
+        ret i32 %conv
+}

diff --git a/test/CodeGen/X86/pr3241.ll b/test/CodeGen/X86/pr3241.ll
new file mode 100644
index 0000000..2f7917b
--- /dev/null
+++ b/test/CodeGen/X86/pr3241.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+; PR3241
+
+@g_620 = external global i32
+
+define void @func_18(i32 %p_21) nounwind {
+entry:
+	%t0 = call i32 @func_31(i32 %p_21) nounwind
+	%t1 = call i32 @safe_add_macro_uint32_t_u_u() nounwind
+	%t2 = icmp sgt i32 %t1, 0
+	%t3 = zext i1 %t2 to i32
+	%t4 = load i32* @g_620, align 4
+	%t5 = icmp eq i32 %t3, %t4
+	%t6 = xor i32 %p_21, 1
+	%t7 = call i32 @func_55(i32 %t6) nounwind
+	br i1 %t5, label %return, label %bb
+
+bb:
+	unreachable
+
+return:
+	unreachable
+}
+
+declare i32 @func_31(i32)
+
+declare i32 @safe_add_macro_uint32_t_u_u()
+
+declare i32 @func_55(i32)

diff --git a/test/CodeGen/X86/pr3243.ll b/test/CodeGen/X86/pr3243.ll
new file mode 100644
index 0000000..483b5bf
--- /dev/null
+++ b/test/CodeGen/X86/pr3243.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+; PR3243
+
+declare signext i16 @safe_mul_func_int16_t_s_s(i16 signext, i32) nounwind readnone optsize
+
+define i32 @func_120(i32 %p_121) nounwind optsize {
+entry:
+	%0 = trunc i32 %p_121 to i16		; <i16> [#uses=1]
+	%1 = urem i16 %0, -15461		; <i16> [#uses=1]
+	%phitmp1 = trunc i16 %1 to i8		; <i8> [#uses=1]
+	%phitmp2 = urem i8 %phitmp1, -1		; <i8> [#uses=1]
+	%phitmp3 = zext i8 %phitmp2 to i16		; <i16> [#uses=1]
+	%2 = tail call signext i16 @safe_mul_func_int16_t_s_s(i16 signext %phitmp3, i32 1) nounwind		; <i16> [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
new file mode 100644
index 0000000..2598c2f
--- /dev/null
+++ b/test/CodeGen/X86/pr3244.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86
+; PR3244
+
+@g_62 = external global i16             ; <i16*> [#uses=1]
+@g_487 = external global i32            ; <i32*> [#uses=1]
+
+define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
+entry:
+        %0 = load i16* @g_62, align 2           ; <i16> [#uses=1]
+        %1 = load i32* @g_487, align 4          ; <i32> [#uses=1]
+        %2 = trunc i16 %0 to i8         ; <i8> [#uses=1]
+        %3 = trunc i32 %1 to i8         ; <i8> [#uses=1]
+        %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+nounwind             ; <i32> [#uses=1]
+        %5 = trunc i32 %4 to i8         ; <i8> [#uses=1]
+        %6 = mul i8 %3, %2              ; <i8> [#uses=1]
+        %7 = mul i8 %6, %5              ; <i8> [#uses=1]
+        %8 = sext i8 %7 to i16          ; <i16> [#uses=1]
+        %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind     
+        ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @func_7(...)
+
+declare i32 @func_85(i16 signext, i32, i32)

diff --git a/test/CodeGen/X86/pr3250.ll b/test/CodeGen/X86/pr3250.ll
new file mode 100644
index 0000000..cccbf54
--- /dev/null
+++ b/test/CodeGen/X86/pr3250.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86
+; PR3250
+
+declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
+
+define i32 @func_106(i32 %p_107) nounwind {
+entry:
+        %0 = tail call i32 (...)* @safe_div_(i32 %p_107, i32 1) nounwind       
+        ; <i32> [#uses=1]
+        %1 = lshr i32 %0, -9            ; <i32> [#uses=1]
+        %2 = trunc i32 %1 to i16                ; <i16> [#uses=1]
+        %3 = tail call i32 @safe_sub_func_short_u_u(i16 signext 1, i16 signext
+%2) nounwind             ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @safe_div_(...)

diff --git a/test/CodeGen/X86/pr3317.ll b/test/CodeGen/X86/pr3317.ll
new file mode 100644
index 0000000..9d6626b
--- /dev/null
+++ b/test/CodeGen/X86/pr3317.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86
+; PR3317
+
+        %ArraySInt16 = type { %JavaObject, i8*, [0 x i16] }
+        %ArraySInt8 = type { %JavaObject, i8*, [0 x i8] }
+        %Attribut = type { %ArraySInt16*, i32, i32 }
+        %CacheNode = type { i8*, %JavaCommonClass*, %CacheNode*, %Enveloppe* }
+        %Enveloppe = type { %CacheNode*, %ArraySInt16*, %ArraySInt16*, i8, %JavaClass*, %CacheNode }
+        %JavaArray = type { %JavaObject, i8* }
+        %JavaClass = type { %JavaCommonClass, i32, %VT*, [1 x %TaskClassMirror], i8*, %JavaField*, i16, %JavaField*, i16, %JavaMethod*, i16, %JavaMethod*, i16, i8*, %ArraySInt8*, i8*, %Attribut*, i16, %JavaClass**, i16, %JavaClass*, i16, i8, i32, i32, i8*, void (i8*)* }
+        %JavaCommonClass = type { %JavaCommonClass**, i32, [1 x %JavaObject*], i16, %JavaClass**, i16, %ArraySInt16*, %JavaClass*, i8* }
+        %JavaField = type { i8*, i16, %ArraySInt16*, %ArraySInt16*, %Attribut*, i16, %JavaClass*, i32, i16, i8* }
+        %JavaMethod = type { i8*, i16, %Attribut*, i16, %Enveloppe*, i16, %JavaClass*, %ArraySInt16*, %ArraySInt16*, i8, i8*, i32, i8* }
+        %JavaObject = type { %VT*, %JavaCommonClass*, i8* }
+        %TaskClassMirror = type { i32, i8* }
+        %UTF8 = type { %JavaObject, i8*, [0 x i16] }
+        %VT = type [0 x i32 (...)*]
+
+declare void @jnjvmNullPointerException()
+
+define i32 @JnJVM_java_rmi_activation_ActivationGroupID_hashCode__(%JavaObject* nocapture) nounwind {
+start:
+        %1 = getelementptr %JavaObject* %0, i64 1, i32 1                ; <%JavaCommonClass**> [#uses=1]
+        %2 = load %JavaCommonClass** %1         ; <%JavaCommonClass*> [#uses=4]
+        %3 = icmp eq %JavaCommonClass* %2, null         ; <i1> [#uses=1]
+        br i1 %3, label %verifyNullExit1, label %verifyNullCont2
+
+verifyNullExit1:                ; preds = %start
+        tail call void @jnjvmNullPointerException()
+        unreachable
+
+verifyNullCont2:                ; preds = %start
+        %4 = bitcast %JavaCommonClass* %2 to { %JavaObject, i16, i32, i64 }*            ; <{ %JavaObject, i16, i32, i64 }*> [#uses=1]
+        %5 = getelementptr { %JavaObject, i16, i32, i64 }* %4, i64 0, i32 2             ; <i32*> [#uses=1]
+        %6 = load i32* %5               ; <i32> [#uses=1]
+        %7 = getelementptr %JavaCommonClass* %2, i64 0, i32 4           ; <%JavaClass***> [#uses=1]
+        %8 = bitcast %JavaClass*** %7 to i64*           ; <i64*> [#uses=1]
+        %9 = load i64* %8               ; <i64> [#uses=1]
+        %10 = trunc i64 %9 to i32               ; <i32> [#uses=1]
+        %11 = getelementptr %JavaCommonClass* %2, i64 0, i32 3          ; <i16*> [#uses=1]
+        %12 = load i16* %11             ; <i16> [#uses=1]
+        %13 = sext i16 %12 to i32               ; <i32> [#uses=1]
+        %14 = xor i32 %10, %6           ; <i32> [#uses=1]
+        %15 = xor i32 %14, %13          ; <i32> [#uses=1]
+        ret i32 %15 
+}

diff --git a/test/CodeGen/X86/pr3366.ll b/test/CodeGen/X86/pr3366.ll
new file mode 100644
index 0000000..f813e2e
--- /dev/null
+++ b/test/CodeGen/X86/pr3366.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | grep movzbl
+; PR3366
+
+define void @_ada_c34002a() nounwind {
+entry:
+  %0 = load i8* null, align 1
+  %1 = sdiv i8 90, %0
+  %2 = icmp ne i8 %1, 3
+  %3 = zext i1 %2 to i8
+  %toBool449 = icmp ne i8 %3, 0
+  %4 = or i1 false, %toBool449
+  %5 = zext i1 %4 to i8
+  %toBool450 = icmp ne i8 %5, 0
+  br i1 %toBool450, label %bb451, label %bb457
+
+bb451:
+  br label %bb457
+
+bb457:
+  unreachable
+}

diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
new file mode 100644
index 0000000..f7af927
--- /dev/null
+++ b/test/CodeGen/X86/pr3457.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep fstpt
+; PR3457
+; rdar://6548010
+
+define void @foo(double* nocapture %P) nounwind {
+entry:
+	%0 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
+	%1 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
+	%2 = fmul double %0, %0		; <double> [#uses=1]
+	%3 = fmul double %1, %1		; <double> [#uses=1]
+	%4 = fadd double %2, %3		; <double> [#uses=1]
+	store double %4, double* %P, align 8
+	ret void
+}
+
+declare double @test(...)

diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
new file mode 100644
index 0000000..71aa5a0
--- /dev/null
+++ b/test/CodeGen/X86/pr3495-2.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of reloads omited}
+
+target datalayout = "e-p:32:32:32"
+target triple = "i386-apple-darwin9.6"
+	%struct.constraintVCGType = type { i32, i32, i32, i32 }
+	%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
+
+define fastcc void @SCC_DFSBelowVCG(%struct.nodeVCGType* %VCG, i32 %net, i32 %label) nounwind {
+entry:
+	%0 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 5		; <i32*> [#uses=2]
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb5, label %bb.nph3
+
+bb.nph3:		; preds = %entry
+	%3 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 4		; <%struct.constraintVCGType**> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb3, %bb.nph3
+	%s.02 = phi i32 [ 0, %bb.nph3 ], [ %12, %bb3 ]		; <i32> [#uses=2]
+	%4 = load %struct.constraintVCGType** %3, align 4		; <%struct.constraintVCGType*> [#uses=1]
+	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb1, label %bb3
+
+bb1:		; preds = %bb
+	%6 = getelementptr %struct.constraintVCGType* %4, i32 %s.02, i32 0		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=2]
+	%8 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 7		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = icmp eq i32 %9, 0		; <i1> [#uses=1]
+	br i1 %10, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	%11 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 4		; <%struct.constraintVCGType**> [#uses=0]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb2
+	br label %bb.i
+
+bb3:		; preds = %bb1, %bb
+	%12 = add i32 %s.02, 1		; <i32> [#uses=2]
+	%13 = load i32* %0, align 4		; <i32> [#uses=1]
+	%14 = icmp ugt i32 %13, %12		; <i1> [#uses=1]
+	br i1 %14, label %bb, label %bb5
+
+bb5:		; preds = %bb3, %entry
+	%15 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 6		; <i32*> [#uses=1]
+	store i32 %label, i32* %15, align 4
+	ret void
+}

diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
new file mode 100644
index 0000000..1795970
--- /dev/null
+++ b/test/CodeGen/X86/pr3495.ll

@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
+; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38
+; PR3495
+; The loop reversal kicks in once here, resulting in one fewer instruction.
+
+target triple = "i386-pc-linux-gnu"
+@x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
+@rows = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=2]
+@up = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=2]
+@down = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=1]
+
+define i32 @queens(i32 %c) nounwind {
+entry:
+	%tmp91 = add i32 %c, 1		; <i32> [#uses=3]
+	%tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91		; <i32*> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb569, %entry
+	%r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ]		; <i32> [#uses=4]
+	%tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp28 = load i32* %tmp27, align 4		; <i32> [#uses=1]
+	%tmp29 = icmp eq i32 %tmp28, 0		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb569, label %bb31
+
+bb31:		; preds = %bb
+	%tmp35 = sub i32 %r25.0.reg2mem.0, 0		; <i32> [#uses=1]
+	%tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35		; <i32*> [#uses=1]
+	%tmp37 = load i32* %tmp36, align 4		; <i32> [#uses=1]
+	%tmp38 = icmp eq i32 %tmp37, 0		; <i1> [#uses=1]
+	br i1 %tmp38, label %bb569, label %bb41
+
+bb41:		; preds = %bb31
+	%tmp54 = sub i32 %r25.0.reg2mem.0, %c		; <i32> [#uses=1]
+	%tmp55 = add i32 %tmp54, 7		; <i32> [#uses=1]
+	%tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55		; <i32*> [#uses=2]
+	store i32 0, i32* %tmp62, align 4
+	br label %bb92
+
+bb92:		; preds = %bb545, %bb41
+	%r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ]		; <i32> [#uses=5]
+	%tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=0]
+	%tmp112 = add i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=1]
+	%tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112		; <i32*> [#uses=2]
+	%tmp114 = load i32* %tmp113, align 4		; <i32> [#uses=1]
+	%tmp115 = icmp eq i32 %tmp114, 0		; <i1> [#uses=1]
+	br i1 %tmp115, label %bb545, label %bb118
+
+bb118:		; preds = %bb92
+	%tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=0]
+	store i32 0, i32* %tmp113, align 4
+	store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
+	br label %bb142
+
+bb142:		; preds = %bb142, %bb118
+	%k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ]		; <i32> [#uses=1]
+	%indvar.next709 = add i32 %k18.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond710 = icmp eq i32 %indvar.next709, 8		; <i1> [#uses=1]
+	br i1 %exitcond710, label %bb155, label %bb142
+
+bb155:		; preds = %bb142
+	%tmp156 = tail call i32 @putchar(i32 10) nounwind		; <i32> [#uses=0]
+	br label %bb545
+
+bb545:		; preds = %bb155, %bb92
+	%indvar.next711 = add i32 %r20.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond712 = icmp eq i32 %indvar.next711, 8		; <i1> [#uses=1]
+	br i1 %exitcond712, label %bb553, label %bb92
+
+bb553:		; preds = %bb545
+	store i32 1, i32* %tmp62, align 4
+	br label %bb569
+
+bb569:		; preds = %bb553, %bb31, %bb
+	%indvar.next715 = add i32 %r25.0.reg2mem.0, 1		; <i32> [#uses=1]
+	br label %bb
+}
+
+declare i32 @putchar(i32)

diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
new file mode 100644
index 0000000..7cdeaa0
--- /dev/null
+++ b/test/CodeGen/X86/pr3522.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
+; PR3522
+
+target triple = "i386-pc-linux-gnu"
+@.str = external constant [13 x i8]		; <[13 x i8]*> [#uses=1]
+
+define void @_ada_c34018a() {
+entry:
+	%0 = tail call i32 @report__ident_int(i32 90)		; <i32> [#uses=1]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	invoke void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 32) noreturn
+			to label %invcont unwind label %lpad
+
+invcont:		; preds = %entry
+	unreachable
+
+bb22:		; preds = %lpad
+	ret void
+
+return:		; preds = %lpad
+	ret void
+
+lpad:		; preds = %entry
+	%2 = icmp eq i8 %1, 90		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb22
+}
+
+declare void @__gnat_rcheck_12(i8*, i32) noreturn
+
+declare i32 @report__ident_int(i32)

diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
new file mode 100644
index 0000000..e89b507
--- /dev/null
+++ b/test/CodeGen/X86/pre-split1.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+; XFAIL: *
+
+define void @test(double* %P, i32 %cond) nounwind {
+entry:
+	%0 = load double* %P, align 8		; <double> [#uses=1]
+	%1 = fadd double %0, 4.000000e+00		; <double> [#uses=2]
+	%2 = icmp eq i32 %cond, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%3 = fadd double %1, 4.000000e+00		; <double> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%A.0 = phi double [ %3, %bb ], [ %1, %entry ]		; <double> [#uses=1]
+	%4 = fmul double %A.0, 4.000000e+00		; <double> [#uses=1]
+	%5 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
+	store double %4, double* %P, align 8
+	ret void
+}
+
+declare i32 @bar(...)

diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
new file mode 100644
index 0000000..db039bd
--- /dev/null
+++ b/test/CodeGen/X86/pre-split10.ll

@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb14.i
+
+bb14.i:		; preds = %bb14.i, %entry
+	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %0, %bb14.i ]		; <i32> [#uses=1]
+	%0 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%3 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%exitcond75.i = icmp eq i32 %0, 32		; <i1> [#uses=1]
+	br i1 %exitcond75.i, label %bb24.i, label %bb14.i
+
+bb24.i:		; preds = %bb14.i
+	%4 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%5 = fdiv double %1, 0.000000e+00		; <double> [#uses=1]
+	%6 = fdiv double %2, 0.000000e+00		; <double> [#uses=1]
+	%7 = fdiv double %3, 0.000000e+00		; <double> [#uses=1]
+	br label %bb31.i
+
+bb31.i:		; preds = %bb31.i, %bb24.i
+	%tmp.0.reg2mem.0.i = phi i32 [ 0, %bb24.i ], [ %indvar.next64.i, %bb31.i ]		; <i32> [#uses=1]
+	%indvar.next64.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond65.i = icmp eq i32 %indvar.next64.i, 64		; <i1> [#uses=1]
+	br i1 %exitcond65.i, label %bb33.i, label %bb31.i
+
+bb33.i:		; preds = %bb31.i
+	br label %bb35.preheader.i
+
+bb5.i.i:		; preds = %bb35.preheader.i
+	%8 = call double @floor(double 0.000000e+00) nounwind readnone		; <double> [#uses=0]
+	br label %bb7.i.i
+
+bb7.i.i:		; preds = %bb35.preheader.i, %bb5.i.i
+	br label %bb35.preheader.i
+
+bb35.preheader.i:		; preds = %bb7.i.i, %bb33.i
+	%9 = fsub double 0.000000e+00, %4		; <double> [#uses=1]
+	store double %9, double* null, align 8
+	%10 = fsub double 0.000000e+00, %5		; <double> [#uses=1]
+	store double %10, double* null, align 8
+	%11 = fsub double 0.000000e+00, %6		; <double> [#uses=1]
+	store double %11, double* null, align 8
+	%12 = fsub double 0.000000e+00, %7		; <double> [#uses=1]
+	store double %12, double* null, align 8
+	br i1 false, label %bb7.i.i, label %bb5.i.i
+}
+
+declare double @floor(double) nounwind readnone

diff --git a/test/CodeGen/X86/pre-split11.ll b/test/CodeGen/X86/pre-split11.ll
new file mode 100644
index 0000000..0a9f4e3
--- /dev/null
+++ b/test/CodeGen/X86/pre-split11.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
+
+@.str = private constant [28 x i8] c"\0A\0ADOUBLE            D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str1 = private constant [37 x i8] c"double to long    l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+@.str2 = private constant [35 x i8] c"double to uint   ui1 = %u\09\09(0x%x)\0A\00", align 8 ; <[35 x i8]*> [#uses=1]
+@.str3 = private constant [37 x i8] c"double to ulong  ul1 = %lu\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+; CHECK: movsd %xmm0, (%rsp)
+entry:
+  %0 = icmp sgt i32 %argc, 4                      ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds i8** %argv, i64 4   ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 8                      ; <i8*> [#uses=1]
+  %3 = tail call double @atof(i8* %2) nounwind    ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %storemerge = phi double [ %3, %bb ], [ 2.000000e+00, %entry ] ; <double> [#uses=4]
+  %4 = fptoui double %storemerge to i32           ; <i32> [#uses=2]
+  %5 = fptoui double %storemerge to i64           ; <i64> [#uses=2]
+  %6 = fptosi double %storemerge to i64           ; <i64> [#uses=2]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), double %storemerge) nounwind ; <i32> [#uses=0]
+  %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str1, i64 0, i64 0), i64 %6, i64 %6) nounwind ; <i32> [#uses=0]
+  %9 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0), i32 %4, i32 %4) nounwind ; <i32> [#uses=0]
+  %10 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str3, i64 0, i64 0), i64 %5, i64 %5) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare double @atof(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind

diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
new file mode 100644
index 0000000..ba902f9
--- /dev/null
+++ b/test/CodeGen/X86/pre-split2.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | count 2
+
+define i32 @t(i32 %arg) {
+entry:
+	br label %bb6
+
+.noexc6:		; preds = %bb6
+	%0 = and i32 %2, -8		; <i32> [#uses=1]
+	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
+	store double %1, double* null, align 8
+	br label %bb6
+
+bb6:		; preds = %.noexc6, %entry
+	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
+	%2 = sub i32 0, 0		; <i32> [#uses=1]
+	%3 = invoke i8* @_Znwm(i32 0)
+			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
+
+lpad32:		; preds = %bb6
+	unreachable
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
+
+declare i8* @_Znwm(i32)

diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
new file mode 100644
index 0000000..2e31420
--- /dev/null
+++ b/test/CodeGen/X86/pre-split3.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+
+define i32 @t(i32 %arg) {
+entry:
+	br label %bb6
+
+.noexc6:		; preds = %bb6
+	%0 = and i32 %2, -8		; <i32> [#uses=1]
+	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
+	store double %1, double* null, align 8
+	br label %bb6
+
+bb6:		; preds = %.noexc6, %entry
+	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
+	%2 = sub i32 0, 0		; <i32> [#uses=1]
+	%3 = invoke i8* @_Znwm(i32 0)
+			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
+
+lpad32:		; preds = %bb6
+	unreachable
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
+
+declare i8* @_Znwm(i32)

diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
new file mode 100644
index 0000000..10cef27
--- /dev/null
+++ b/test/CodeGen/X86/pre-split4.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%k.0.reg2mem.0 = phi double [ 1.000000e+00, %entry ], [ %6, %bb ]		; <double> [#uses=2]
+	%Flint.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %5, %bb ]		; <double> [#uses=1]
+	%twoThrd.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=1]
+	%0 = tail call double @llvm.pow.f64(double 0x3FE5555555555555, double 0.000000e+00)		; <double> [#uses=1]
+	%1 = fadd double %0, %twoThrd.0.reg2mem.0		; <double> [#uses=1]
+	%2 = tail call double @sin(double %k.0.reg2mem.0) nounwind readonly		; <double> [#uses=1]
+	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=1]
+	%4 = fdiv double 1.000000e+00, %3		; <double> [#uses=1]
+        store double %Flint.0.reg2mem.0, double* null
+        store double %twoThrd.0.reg2mem.0, double* null
+	%5 = fadd double %4, %Flint.0.reg2mem.0		; <double> [#uses=1]
+	%6 = fadd double %k.0.reg2mem.0, 1.000000e+00		; <double> [#uses=1]
+	br label %bb
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+declare double @sin(double) nounwind readonly

diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
new file mode 100644
index 0000000..8def460
--- /dev/null
+++ b/test/CodeGen/X86/pre-split5.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+
+target triple = "i386-apple-darwin9.5"
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@"\01LC1" = external constant [48 x i8]		; <[48 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	br label %bb5.us
+
+bb5.us:		; preds = %bb8.split, %bb5.us, %entry
+	%i.0.reg2mem.0.ph = phi i32 [ 0, %entry ], [ %indvar.next53, %bb8.split ], [ %i.0.reg2mem.0.ph, %bb5.us ]		; <i32> [#uses=2]
+	%j.0.reg2mem.0.us = phi i32 [ %indvar.next47, %bb5.us ], [ 0, %bb8.split ], [ 0, %entry ]		; <i32> [#uses=1]
+	%indvar.next47 = add i32 %j.0.reg2mem.0.us, 1		; <i32> [#uses=2]
+	%exitcond48 = icmp eq i32 %indvar.next47, 256		; <i1> [#uses=1]
+	br i1 %exitcond48, label %bb8.split, label %bb5.us
+
+bb8.split:		; preds = %bb5.us
+	%indvar.next53 = add i32 %i.0.reg2mem.0.ph, 1		; <i32> [#uses=2]
+	%exitcond54 = icmp eq i32 %indvar.next53, 256		; <i1> [#uses=1]
+	br i1 %exitcond54, label %bb11, label %bb5.us
+
+bb11:		; preds = %bb11, %bb8.split
+	%i.1.reg2mem.0 = phi i32 [ %indvar.next44, %bb11 ], [ 0, %bb8.split ]		; <i32> [#uses=1]
+	%indvar.next44 = add i32 %i.1.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond45 = icmp eq i32 %indvar.next44, 63		; <i1> [#uses=1]
+	br i1 %exitcond45, label %bb14, label %bb11
+
+bb14:		; preds = %bb14, %bb11
+	%indvar = phi i32 [ %indvar.next40, %bb14 ], [ 0, %bb11 ]		; <i32> [#uses=1]
+	%indvar.next40 = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond41 = icmp eq i32 %indvar.next40, 32768		; <i1> [#uses=1]
+	br i1 %exitcond41, label %bb28, label %bb14
+
+bb28:		; preds = %bb14
+	%0 = fdiv double 2.550000e+02, 0.000000e+00		; <double> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %bb36, %bb28
+	%m.1.reg2mem.0 = phi i32 [ %m.0, %bb36 ], [ 0, %bb28 ]		; <i32> [#uses=1]
+	%1 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
+	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
+	br i1 false, label %bb36, label %bb35
+
+bb35:		; preds = %bb30
+	%3 = tail call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([48 x i8]* @"\01LC1", i32 0, i32 0), i32 0, i32 0, i32 0, i32 %2) nounwind		; <i32> [#uses=0]
+	br label %bb36
+
+bb36:		; preds = %bb35, %bb30
+	%m.0 = phi i32 [ 0, %bb35 ], [ %m.1.reg2mem.0, %bb30 ]		; <i32> [#uses=1]
+	br label %bb30
+}
+
+declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind

diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
new file mode 100644
index 0000000..d38e630
--- /dev/null
+++ b/test/CodeGen/X86/pre-split6.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split | grep {divsd	8} | count 1
+
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @sin(double) nounwind readonly
+
+declare double @asin(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	%0 = load double* null, align 8		; <double> [#uses=1]
+	%1 = load double* null, align 8		; <double> [#uses=2]
+	%2 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %2, label %bb9.i, label %bb13.i
+
+bb9.i:		; preds = %bb.nph
+	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%4 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
+	%5 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
+	%6 = tail call double @asin(double %5) nounwind readonly		; <double> [#uses=0]
+	unreachable
+
+bb13.i:		; preds = %bb.nph
+	%7 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
+	%8 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%9 = fmul double %7, %8		; <double> [#uses=1]
+	%10 = tail call double @asin(double %9) nounwind readonly		; <double> [#uses=0]
+	unreachable
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
new file mode 100644
index 0000000..0b81c0b
--- /dev/null
+++ b/test/CodeGen/X86/pre-split7.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+
+@object_distance = external global double, align 8		; <double*> [#uses=1]
+@axis_slope_angle = external global double, align 8		; <double*> [#uses=1]
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @sin(double) nounwind readonly
+
+declare double @asin(double) nounwind readonly
+
+declare double @tan(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%0 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%1 = fadd double 0.000000e+00, %0		; <double> [#uses=2]
+	%2 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%3 = fsub double %1, %2		; <double> [#uses=2]
+	store double %3, double* @axis_slope_angle, align 8
+	%4 = fdiv double %1, 2.000000e+00		; <double> [#uses=1]
+	%5 = tail call double @sin(double %4) nounwind readonly		; <double> [#uses=1]
+	%6 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%7 = tail call double @tan(double %3) nounwind readonly		; <double> [#uses=0]
+	%8 = fadd double 0.000000e+00, %6		; <double> [#uses=1]
+	store double %8, double* @object_distance, align 8
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
new file mode 100644
index 0000000..ea4b949
--- /dev/null
+++ b/test/CodeGen/X86/pre-split8.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @asin(double) nounwind readonly
+
+declare double @tan(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb, label %return
+
+bb:		; preds = %bb9.i, %entry
+	%.rle4 = phi double [ %7, %bb9.i ], [ 0.000000e+00, %entry ]		; <double> [#uses=1]
+	%0 = load double* null, align 8		; <double> [#uses=3]
+	%1 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %1, label %bb9.i, label %bb13.i
+
+bb9.i:		; preds = %bb
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
+	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%4 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
+	%5 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%6 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
+	%7 = fadd double %6, 0.000000e+00		; <double> [#uses=1]
+	br i1 false, label %return, label %bb
+
+bb13.i:		; preds = %bb
+	unreachable
+
+return:		; preds = %bb9.i, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
new file mode 100644
index 0000000..c27d925
--- /dev/null
+++ b/test/CodeGen/X86/pre-split9.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @sin(double) nounwind readonly
+
+declare double @asin(double) nounwind readonly
+
+declare double @tan(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb, label %return
+
+bb:		; preds = %bb9.i, %entry
+	%.rle4 = phi double [ %8, %bb9.i ], [ 0.000000e+00, %entry ]		; <double> [#uses=1]
+	%0 = load double* null, align 8		; <double> [#uses=3]
+	%1 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %1, label %bb9.i, label %bb13.i
+
+bb9.i:		; preds = %bb
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
+	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%4 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%5 = fmul double %4, %0		; <double> [#uses=1]
+	%6 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%7 = fmul double %5, 0.000000e+00		; <double> [#uses=1]
+	%8 = fadd double %7, 0.000000e+00		; <double> [#uses=1]
+	br i1 false, label %return, label %bb
+
+bb13.i:		; preds = %bb
+	unreachable
+
+return:		; preds = %bb9.i, %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
new file mode 100644
index 0000000..fac5915
--- /dev/null
+++ b/test/CodeGen/X86/prefetch.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse > %t
+; RUN: grep prefetchnta %t
+; RUN: grep prefetcht0 %t
+; RUN: grep prefetcht1 %t
+; RUN: grep prefetcht2 %t
+
+define void @t(i8* %ptr) nounwind  {
+entry:
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0 )
+	ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind 

diff --git a/test/CodeGen/X86/private-2.ll b/test/CodeGen/X86/private-2.ll
new file mode 100644
index 0000000..8aa744e
--- /dev/null
+++ b/test/CodeGen/X86/private-2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; Quote should be outside of private prefix.
+; rdar://6855766x
+
+	%struct.A = type { i32*, i32 }
+@"_ZZ20-[Example1 whatever]E4C.91" = private constant %struct.A { i32* null, i32 1 }		; <%struct.A*> [#uses=1]
+
+define internal i32* @"\01-[Example1 whatever]"() nounwind optsize ssp {
+entry:
+	%0 = getelementptr %struct.A* @"_ZZ20-[Example1 whatever]E4C.91", i64 0, i32 0		; <i32**> [#uses=1]
+	%1 = load i32** %0, align 8		; <i32*> [#uses=1]
+	ret i32* %1
+}

diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
new file mode 100644
index 0000000..f52f8c7
--- /dev/null
+++ b/test/CodeGen/X86/private.ll

@@ -0,0 +1,20 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lfoo:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}

diff --git a/test/CodeGen/X86/ptrtoint-constexpr.ll b/test/CodeGen/X86/ptrtoint-constexpr.ll
new file mode 100644
index 0000000..dd97905
--- /dev/null
+++ b/test/CodeGen/X86/ptrtoint-constexpr.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-linux | FileCheck %s
+	%union.x = type { i64 }
+
+; CHECK:	.globl r
+; CHECK: r:
+; CHECK: .quad	r&4294967295
+
+@r = global %union.x { i64 ptrtoint (%union.x* @r to i64) }, align 4
+
+; CHECK:	.globl x
+; CHECK: x:
+; CHECK: .quad	3
+
+@x = global i64 mul (i64 3, i64 ptrtoint (i2* getelementptr (i2* null, i64 1) to i64))

diff --git a/test/CodeGen/X86/rdtsc.ll b/test/CodeGen/X86/rdtsc.ll
new file mode 100644
index 0000000..f21a44c
--- /dev/null
+++ b/test/CodeGen/X86/rdtsc.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep rdtsc
+; RUN: llc < %s -march=x86-64 | grep rdtsc
+declare i64 @llvm.readcyclecounter()
+
+define i64 @foo() {
+	%tmp.1 = call i64 @llvm.readcyclecounter( )		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}

diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
new file mode 100644
index 0000000..1ffb4e3
--- /dev/null
+++ b/test/CodeGen/X86/red-zone.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; First without noredzone.
+; CHECK: f0:
+; CHECK: -4(%rsp)
+; CHECK: -4(%rsp)
+; CHECK: ret
+define x86_fp80 @f0(float %f) nounwind readnone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
+
+; Then with noredzone.
+; CHECK: f1:
+; CHECK: subq $4, %rsp
+; CHECK: (%rsp)
+; CHECK: (%rsp)
+; CHECK: addq $4, %rsp
+; CHECK: ret
+define x86_fp80 @f1(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}

diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
new file mode 100644
index 0000000..9557d17
--- /dev/null
+++ b/test/CodeGen/X86/red-zone2.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep subq %t | count 1
+; RUN: grep addq %t | count 1
+
+define x86_fp80 @f0(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}

diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
new file mode 100644
index 0000000..e0b5f7a
--- /dev/null
+++ b/test/CodeGen/X86/regpressure.ll

@@ -0,0 +1,114 @@
+;; Both functions in this testcase should codegen to the same function, and
+;; neither of them should require spilling anything to the stack.
+
+; RUN: llc < %s -march=x86 -stats |& \
+; RUN:   not grep {Number of register spills}
+
+;; This can be compiled to use three registers if the loads are not
+;; folded into the multiplies, 2 registers otherwise.
+
+define i32 @regpressure1(i32* %P) {
+	%A = load i32* %P		; <i32> [#uses=1]
+	%Bp = getelementptr i32* %P, i32 1		; <i32*> [#uses=1]
+	%B = load i32* %Bp		; <i32> [#uses=1]
+	%s1 = mul i32 %A, %B		; <i32> [#uses=1]
+	%Cp = getelementptr i32* %P, i32 2		; <i32*> [#uses=1]
+	%C = load i32* %Cp		; <i32> [#uses=1]
+	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]
+	%Dp = getelementptr i32* %P, i32 3		; <i32*> [#uses=1]
+	%D = load i32* %Dp		; <i32> [#uses=1]
+	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]
+	%Ep = getelementptr i32* %P, i32 4		; <i32*> [#uses=1]
+	%E = load i32* %Ep		; <i32> [#uses=1]
+	%s4 = mul i32 %s3, %E		; <i32> [#uses=1]
+	%Fp = getelementptr i32* %P, i32 5		; <i32*> [#uses=1]
+	%F = load i32* %Fp		; <i32> [#uses=1]
+	%s5 = mul i32 %s4, %F		; <i32> [#uses=1]
+	%Gp = getelementptr i32* %P, i32 6		; <i32*> [#uses=1]
+	%G = load i32* %Gp		; <i32> [#uses=1]
+	%s6 = mul i32 %s5, %G		; <i32> [#uses=1]
+	%Hp = getelementptr i32* %P, i32 7		; <i32*> [#uses=1]
+	%H = load i32* %Hp		; <i32> [#uses=1]
+	%s7 = mul i32 %s6, %H		; <i32> [#uses=1]
+	%Ip = getelementptr i32* %P, i32 8		; <i32*> [#uses=1]
+	%I = load i32* %Ip		; <i32> [#uses=1]
+	%s8 = mul i32 %s7, %I		; <i32> [#uses=1]
+	%Jp = getelementptr i32* %P, i32 9		; <i32*> [#uses=1]
+	%J = load i32* %Jp		; <i32> [#uses=1]
+	%s9 = mul i32 %s8, %J		; <i32> [#uses=1]
+	ret i32 %s9
+}
+
+define i32 @regpressure2(i32* %P) {
+	%A = load i32* %P		; <i32> [#uses=1]
+	%Bp = getelementptr i32* %P, i32 1		; <i32*> [#uses=1]
+	%B = load i32* %Bp		; <i32> [#uses=1]
+	%Cp = getelementptr i32* %P, i32 2		; <i32*> [#uses=1]
+	%C = load i32* %Cp		; <i32> [#uses=1]
+	%Dp = getelementptr i32* %P, i32 3		; <i32*> [#uses=1]
+	%D = load i32* %Dp		; <i32> [#uses=1]
+	%Ep = getelementptr i32* %P, i32 4		; <i32*> [#uses=1]
+	%E = load i32* %Ep		; <i32> [#uses=1]
+	%Fp = getelementptr i32* %P, i32 5		; <i32*> [#uses=1]
+	%F = load i32* %Fp		; <i32> [#uses=1]
+	%Gp = getelementptr i32* %P, i32 6		; <i32*> [#uses=1]
+	%G = load i32* %Gp		; <i32> [#uses=1]
+	%Hp = getelementptr i32* %P, i32 7		; <i32*> [#uses=1]
+	%H = load i32* %Hp		; <i32> [#uses=1]
+	%Ip = getelementptr i32* %P, i32 8		; <i32*> [#uses=1]
+	%I = load i32* %Ip		; <i32> [#uses=1]
+	%Jp = getelementptr i32* %P, i32 9		; <i32*> [#uses=1]
+	%J = load i32* %Jp		; <i32> [#uses=1]
+	%s1 = mul i32 %A, %B		; <i32> [#uses=1]
+	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]
+	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]
+	%s4 = mul i32 %s3, %E		; <i32> [#uses=1]
+	%s5 = mul i32 %s4, %F		; <i32> [#uses=1]
+	%s6 = mul i32 %s5, %G		; <i32> [#uses=1]
+	%s7 = mul i32 %s6, %H		; <i32> [#uses=1]
+	%s8 = mul i32 %s7, %I		; <i32> [#uses=1]
+	%s9 = mul i32 %s8, %J		; <i32> [#uses=1]
+	ret i32 %s9
+}
+
+define i32 @regpressure3(i16* %P, i1 %Cond, i32* %Other) {
+	%A = load i16* %P		; <i16> [#uses=1]
+	%Bp = getelementptr i16* %P, i32 1		; <i16*> [#uses=1]
+	%B = load i16* %Bp		; <i16> [#uses=1]
+	%Cp = getelementptr i16* %P, i32 2		; <i16*> [#uses=1]
+	%C = load i16* %Cp		; <i16> [#uses=1]
+	%Dp = getelementptr i16* %P, i32 3		; <i16*> [#uses=1]
+	%D = load i16* %Dp		; <i16> [#uses=1]
+	%Ep = getelementptr i16* %P, i32 4		; <i16*> [#uses=1]
+	%E = load i16* %Ep		; <i16> [#uses=1]
+	%Fp = getelementptr i16* %P, i32 5		; <i16*> [#uses=1]
+	%F = load i16* %Fp		; <i16> [#uses=1]
+	%Gp = getelementptr i16* %P, i32 6		; <i16*> [#uses=1]
+	%G = load i16* %Gp		; <i16> [#uses=1]
+	%Hp = getelementptr i16* %P, i32 7		; <i16*> [#uses=1]
+	%H = load i16* %Hp		; <i16> [#uses=1]
+	%Ip = getelementptr i16* %P, i32 8		; <i16*> [#uses=1]
+	%I = load i16* %Ip		; <i16> [#uses=1]
+	%Jp = getelementptr i16* %P, i32 9		; <i16*> [#uses=1]
+	%J = load i16* %Jp		; <i16> [#uses=1]
+	%A.upgrd.1 = sext i16 %A to i32		; <i32> [#uses=1]
+	%B.upgrd.2 = sext i16 %B to i32		; <i32> [#uses=1]
+	%D.upgrd.3 = sext i16 %D to i32		; <i32> [#uses=1]
+	%C.upgrd.4 = sext i16 %C to i32		; <i32> [#uses=1]
+	%E.upgrd.5 = sext i16 %E to i32		; <i32> [#uses=1]
+	%F.upgrd.6 = sext i16 %F to i32		; <i32> [#uses=1]
+	%G.upgrd.7 = sext i16 %G to i32		; <i32> [#uses=1]
+	%H.upgrd.8 = sext i16 %H to i32		; <i32> [#uses=1]
+	%I.upgrd.9 = sext i16 %I to i32		; <i32> [#uses=1]
+	%J.upgrd.10 = sext i16 %J to i32		; <i32> [#uses=1]
+	%s1 = add i32 %A.upgrd.1, %B.upgrd.2		; <i32> [#uses=1]
+	%s2 = add i32 %C.upgrd.4, %s1		; <i32> [#uses=1]
+	%s3 = add i32 %D.upgrd.3, %s2		; <i32> [#uses=1]
+	%s4 = add i32 %E.upgrd.5, %s3		; <i32> [#uses=1]
+	%s5 = add i32 %F.upgrd.6, %s4		; <i32> [#uses=1]
+	%s6 = add i32 %G.upgrd.7, %s5		; <i32> [#uses=1]
+	%s7 = add i32 %H.upgrd.8, %s6		; <i32> [#uses=1]
+	%s8 = add i32 %I.upgrd.9, %s7		; <i32> [#uses=1]
+	%s9 = add i32 %J.upgrd.10, %s8		; <i32> [#uses=1]
+	ret i32 %s9
+}

diff --git a/test/CodeGen/X86/rem-2.ll b/test/CodeGen/X86/rem-2.ll
new file mode 100644
index 0000000..1b2af4b
--- /dev/null
+++ b/test/CodeGen/X86/rem-2.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | not grep cltd
+
+define i32 @test(i32 %X) nounwind readnone {
+entry:
+	%0 = srem i32 41, %X
+	ret i32 %0
+}

diff --git a/test/CodeGen/X86/rem.ll b/test/CodeGen/X86/rem.ll
new file mode 100644
index 0000000..394070e
--- /dev/null
+++ b/test/CodeGen/X86/rem.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 | not grep div
+
+define i32 @test1(i32 %X) {
+        %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @test2(i32 %X) {
+        %tmp1 = srem i32 %X, 256                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @test3(i32 %X) {
+        %tmp1 = urem i32 %X, 255                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @test4(i32 %X) {
+        %tmp1 = urem i32 %X, 256                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+

diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
new file mode 100644
index 0000000..3e81320
--- /dev/null
+++ b/test/CodeGen/X86/remat-constant.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
+
+declare void @bar() nounwind
+
+@a = external constant float
+
+declare void @qux(float %f) nounwind 
+
+define void @foo() nounwind  {
+  %f = load float* @a
+  call void @bar()
+  call void @qux(float %f)
+  call void @qux(float %f)
+  ret void
+}

diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll
new file mode 100644
index 0000000..5fb445c
--- /dev/null
+++ b/test/CodeGen/X86/remat-mov-0.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CodeGen should remat the zero instead of spilling it.
+
+declare void @foo(i64 %p)
+
+; CHECK: bar:
+; CHECK: xorl %edi, %edi
+; CHECK: xorl %edi, %edi
+define void @bar() nounwind {
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  ret void
+}
+
+; CHECK: bat:
+; CHECK: movq $-1, %rdi
+; CHECK: movq $-1, %rdi
+define void @bat() nounwind {
+  call void @foo(i64 -1)
+  call void @foo(i64 -1)
+  ret void
+}
+
+; CHECK: bau:
+; CHECK: movl $1, %edi
+; CHECK: movl $1, %edi
+define void @bau() nounwind {
+  call void @foo(i64 1)
+  call void @foo(i64 1)
+  ret void
+}
+

diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
new file mode 100644
index 0000000..2da96ab
--- /dev/null
+++ b/test/CodeGen/X86/remat-scalar-zero.ll

@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
+; RUN: not grep xor %t
+; RUN: not grep movap %t
+; RUN: grep {\\.quad.*0} %t
+
+; Remat should be able to fold the zero constant into the div instructions
+; as a constant-pool load.
+
+define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %tmp1 = load double* %x                         ; <double> [#uses=1]
+  %arrayidx4 = getelementptr inbounds double* %x, i64 1 ; <double*> [#uses=1]
+  %tmp5 = load double* %arrayidx4                 ; <double> [#uses=1]
+  %arrayidx8 = getelementptr inbounds double* %x, i64 2 ; <double*> [#uses=1]
+  %tmp9 = load double* %arrayidx8                 ; <double> [#uses=1]
+  %arrayidx12 = getelementptr inbounds double* %x, i64 3 ; <double*> [#uses=1]
+  %tmp13 = load double* %arrayidx12               ; <double> [#uses=1]
+  %arrayidx16 = getelementptr inbounds double* %x, i64 4 ; <double*> [#uses=1]
+  %tmp17 = load double* %arrayidx16               ; <double> [#uses=1]
+  %arrayidx20 = getelementptr inbounds double* %x, i64 5 ; <double*> [#uses=1]
+  %tmp21 = load double* %arrayidx20               ; <double> [#uses=1]
+  %arrayidx24 = getelementptr inbounds double* %x, i64 6 ; <double*> [#uses=1]
+  %tmp25 = load double* %arrayidx24               ; <double> [#uses=1]
+  %arrayidx28 = getelementptr inbounds double* %x, i64 7 ; <double*> [#uses=1]
+  %tmp29 = load double* %arrayidx28               ; <double> [#uses=1]
+  %arrayidx32 = getelementptr inbounds double* %x, i64 8 ; <double*> [#uses=1]
+  %tmp33 = load double* %arrayidx32               ; <double> [#uses=1]
+  %arrayidx36 = getelementptr inbounds double* %x, i64 9 ; <double*> [#uses=1]
+  %tmp37 = load double* %arrayidx36               ; <double> [#uses=1]
+  %arrayidx40 = getelementptr inbounds double* %x, i64 10 ; <double*> [#uses=1]
+  %tmp41 = load double* %arrayidx40               ; <double> [#uses=1]
+  %arrayidx44 = getelementptr inbounds double* %x, i64 11 ; <double*> [#uses=1]
+  %tmp45 = load double* %arrayidx44               ; <double> [#uses=1]
+  %arrayidx48 = getelementptr inbounds double* %x, i64 12 ; <double*> [#uses=1]
+  %tmp49 = load double* %arrayidx48               ; <double> [#uses=1]
+  %arrayidx52 = getelementptr inbounds double* %x, i64 13 ; <double*> [#uses=1]
+  %tmp53 = load double* %arrayidx52               ; <double> [#uses=1]
+  %arrayidx56 = getelementptr inbounds double* %x, i64 14 ; <double*> [#uses=1]
+  %tmp57 = load double* %arrayidx56               ; <double> [#uses=1]
+  %arrayidx60 = getelementptr inbounds double* %x, i64 15 ; <double*> [#uses=1]
+  %tmp61 = load double* %arrayidx60               ; <double> [#uses=1]
+  %arrayidx64 = getelementptr inbounds double* %x, i64 16 ; <double*> [#uses=1]
+  %tmp65 = load double* %arrayidx64               ; <double> [#uses=1]
+  %div = fdiv double %tmp1, 0.000000e+00          ; <double> [#uses=1]
+  store double %div, double* %y
+  %div70 = fdiv double %tmp5, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx72 = getelementptr inbounds double* %y, i64 1 ; <double*> [#uses=1]
+  store double %div70, double* %arrayidx72
+  %div74 = fdiv double %tmp9, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx76 = getelementptr inbounds double* %y, i64 2 ; <double*> [#uses=1]
+  store double %div74, double* %arrayidx76
+  %div78 = fdiv double %tmp13, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx80 = getelementptr inbounds double* %y, i64 3 ; <double*> [#uses=1]
+  store double %div78, double* %arrayidx80
+  %div82 = fdiv double %tmp17, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx84 = getelementptr inbounds double* %y, i64 4 ; <double*> [#uses=1]
+  store double %div82, double* %arrayidx84
+  %div86 = fdiv double %tmp21, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx88 = getelementptr inbounds double* %y, i64 5 ; <double*> [#uses=1]
+  store double %div86, double* %arrayidx88
+  %div90 = fdiv double %tmp25, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx92 = getelementptr inbounds double* %y, i64 6 ; <double*> [#uses=1]
+  store double %div90, double* %arrayidx92
+  %div94 = fdiv double %tmp29, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx96 = getelementptr inbounds double* %y, i64 7 ; <double*> [#uses=1]
+  store double %div94, double* %arrayidx96
+  %div98 = fdiv double %tmp33, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx100 = getelementptr inbounds double* %y, i64 8 ; <double*> [#uses=1]
+  store double %div98, double* %arrayidx100
+  %div102 = fdiv double %tmp37, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx104 = getelementptr inbounds double* %y, i64 9 ; <double*> [#uses=1]
+  store double %div102, double* %arrayidx104
+  %div106 = fdiv double %tmp41, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx108 = getelementptr inbounds double* %y, i64 10 ; <double*> [#uses=1]
+  store double %div106, double* %arrayidx108
+  %div110 = fdiv double %tmp45, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx112 = getelementptr inbounds double* %y, i64 11 ; <double*> [#uses=1]
+  store double %div110, double* %arrayidx112
+  %div114 = fdiv double %tmp49, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx116 = getelementptr inbounds double* %y, i64 12 ; <double*> [#uses=1]
+  store double %div114, double* %arrayidx116
+  %div118 = fdiv double %tmp53, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx120 = getelementptr inbounds double* %y, i64 13 ; <double*> [#uses=1]
+  store double %div118, double* %arrayidx120
+  %div122 = fdiv double %tmp57, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx124 = getelementptr inbounds double* %y, i64 14 ; <double*> [#uses=1]
+  store double %div122, double* %arrayidx124
+  %div126 = fdiv double %tmp61, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx128 = getelementptr inbounds double* %y, i64 15 ; <double*> [#uses=1]
+  store double %div126, double* %arrayidx128
+  %div130 = fdiv double %tmp65, 0.000000e+00      ; <double> [#uses=1]
+  %arrayidx132 = getelementptr inbounds double* %y, i64 16 ; <double*> [#uses=1]
+  store double %div130, double* %arrayidx132
+  ret void
+}

diff --git a/test/CodeGen/X86/ret-addr.ll b/test/CodeGen/X86/ret-addr.ll
new file mode 100644
index 0000000..b7b57ab
--- /dev/null
+++ b/test/CodeGen/X86/ret-addr.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-fp-elim -march=x86 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86-64 | not grep xor
+
+define i8* @h() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 2)		; <i8*> [#uses=1]
+	ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+define i8* @g() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 1)		; <i8*> [#uses=1]
+	ret i8* %0
+}
+
+define i8* @f() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 0)		; <i8*> [#uses=1]
+	ret i8* %0
+}

diff --git a/test/CodeGen/X86/ret-i64-0.ll b/test/CodeGen/X86/ret-i64-0.ll
new file mode 100644
index 0000000..bca0f05
--- /dev/null
+++ b/test/CodeGen/X86/ret-i64-0.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep xor | count 2
+
+define i64 @foo() nounwind {
+  ret i64 0
+}

diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
new file mode 100644
index 0000000..04b57dd
--- /dev/null
+++ b/test/CodeGen/X86/ret-mmx.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2
+; rdar://6602459
+
+@g_v1di = external global <1 x i64>
+
+define void @t1() nounwind {
+entry:
+	%call = call <1 x i64> @return_v1di()		; <<1 x i64>> [#uses=0]
+	store <1 x i64> %call, <1 x i64>* @g_v1di
+        ret void
+}
+
+declare <1 x i64> @return_v1di()
+
+define <1 x i64> @t2() nounwind {
+	ret <1 x i64> <i64 1>
+}
+
+define <2 x i32> @t3() nounwind {
+	ret <2 x i32> <i32 1, i32 0>
+}
+
+define double @t4() nounwind {
+	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
+}
+

diff --git a/test/CodeGen/X86/rip-rel-address.ll b/test/CodeGen/X86/rip-rel-address.ll
new file mode 100644
index 0000000..24ff07b
--- /dev/null
+++ b/test/CodeGen/X86/rip-rel-address.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=pic -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=STATIC64
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
+
+@a = internal global double 3.4
+define double @foo() nounwind {
+  %a = load double* @a
+  ret double %a
+  
+; PIC64:    movsd	_a(%rip), %xmm0
+; STATIC64: movsd	a(%rip), %xmm0
+}

diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
new file mode 100644
index 0000000..276f8bb
--- /dev/null
+++ b/test/CodeGen/X86/rodata-relocs.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -relocation-model=static | grep rodata | count 3
+; RUN: llc < %s -relocation-model=static | grep -F "rodata.cst" | count 2
+; RUN: llc < %s -relocation-model=pic | grep rodata | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro" | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel" | count 4
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.local" | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@a = internal constant [2 x i32] [i32 1, i32 2]
+@a1 = constant [2 x i32] [i32 1, i32 2]
+@e = internal constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@e1 = constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@p = constant i8* bitcast ([2 x i32]* @a to i8*)
+@t = constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
+@p1 = constant i8* bitcast ([2 x i32]* @a1 to i8*)
+@t1 = constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
+@p2 = internal global i8* bitcast([2 x i32]* @a1 to i8*)
+@t2 = internal global i8* bitcast([2 x [2 x i32]]* @e1 to i8*)
+@p3 = internal global i8* bitcast([2 x i32]* @a to i8*)
+@t3 = internal global i8* bitcast([2 x [2 x i32]]* @e to i8*)
+

diff --git a/test/CodeGen/X86/rot16.ll b/test/CodeGen/X86/rot16.ll
new file mode 100644
index 0000000..42ece47
--- /dev/null
+++ b/test/CodeGen/X86/rot16.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = shl i16 %x, %z
+	%1 = sub i16 16, %z
+	%2 = lshr i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = shl i16 %y, %z
+	%1 = sub i16 16, %z
+	%2 = lshr i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @un(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = lshr i16 %x, %z
+	%1 = sub i16 16, %z
+	%2 = shl i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = lshr i16 %y, %z
+	%1 = sub i16 16, %z
+	%2 = shl i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @xfoo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = lshr i16 %x, 11
+	%1 = shl i16 %x, 5
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xbar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = shl i16 %y, 5
+	%1 = lshr i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xun(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = lshr i16 %x, 5
+	%1 = shl i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xbu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+	%0 = lshr i16 %y, 5
+	%1 = shl i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}

diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
new file mode 100644
index 0000000..655ed27
--- /dev/null
+++ b/test/CodeGen/X86/rot32.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = shl i32 %x, %z
+	%1 = sub i32 32, %z
+	%2 = lshr i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = shl i32 %y, %z
+	%1 = sub i32 32, %z
+	%2 = lshr i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = lshr i32 %x, %z
+	%1 = sub i32 32, %z
+	%2 = shl i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = lshr i32 %y, %z
+	%1 = sub i32 32, %z
+	%2 = shl i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = lshr i32 %x, 25
+	%1 = shl i32 %x, 7
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = shl i32 %y, 7
+	%1 = lshr i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = lshr i32 %x, 7
+	%1 = shl i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%0 = lshr i32 %y, 7
+	%1 = shl i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}

diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
new file mode 100644
index 0000000..4e082bb
--- /dev/null
+++ b/test/CodeGen/X86/rot64.ll

@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %x, %z
+	%1 = sub i64 64, %z
+	%2 = lshr i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %y, %z
+	%1 = sub i64 64, %z
+	%2 = lshr i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, %z
+	%1 = sub i64 64, %z
+	%2 = shl i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %y, %z
+	%1 = sub i64 64, %z
+	%2 = shl i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, 57
+	%1 = shl i64 %x, 7
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %y, 7
+	%1 = lshr i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, 7
+	%1 = shl i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %y, 7
+	%1 = shl i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}

diff --git a/test/CodeGen/X86/rotate.ll b/test/CodeGen/X86/rotate.ll
new file mode 100644
index 0000000..1e20273
--- /dev/null
+++ b/test/CodeGen/X86/rotate.ll

@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {ro\[rl\]} | count 12
+
+define i32 @rotl32(i32 %A, i8 %Amt) {
+	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]
+	%B = shl i32 %A, %shift.upgrd.1		; <i32> [#uses=1]
+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.2 = zext i8 %Amt2 to i32		; <i32> [#uses=1]
+	%C = lshr i32 %A, %shift.upgrd.2		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotr32(i32 %A, i8 %Amt) {
+	%shift.upgrd.3 = zext i8 %Amt to i32		; <i32> [#uses=1]
+	%B = lshr i32 %A, %shift.upgrd.3		; <i32> [#uses=1]
+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.4 = zext i8 %Amt2 to i32		; <i32> [#uses=1]
+	%C = shl i32 %A, %shift.upgrd.4		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotli32(i32 %A) {
+	%B = shl i32 %A, 5		; <i32> [#uses=1]
+	%C = lshr i32 %A, 27		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotri32(i32 %A) {
+	%B = lshr i32 %A, 5		; <i32> [#uses=1]
+	%C = shl i32 %A, 27		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i16 @rotl16(i16 %A, i8 %Amt) {
+	%shift.upgrd.5 = zext i8 %Amt to i16		; <i16> [#uses=1]
+	%B = shl i16 %A, %shift.upgrd.5		; <i16> [#uses=1]
+	%Amt2 = sub i8 16, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.6 = zext i8 %Amt2 to i16		; <i16> [#uses=1]
+	%C = lshr i16 %A, %shift.upgrd.6		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i16 @rotr16(i16 %A, i8 %Amt) {
+	%shift.upgrd.7 = zext i8 %Amt to i16		; <i16> [#uses=1]
+	%B = lshr i16 %A, %shift.upgrd.7		; <i16> [#uses=1]
+	%Amt2 = sub i8 16, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.8 = zext i8 %Amt2 to i16		; <i16> [#uses=1]
+	%C = shl i16 %A, %shift.upgrd.8		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i16 @rotli16(i16 %A) {
+	%B = shl i16 %A, 5		; <i16> [#uses=1]
+	%C = lshr i16 %A, 11		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i16 @rotri16(i16 %A) {
+	%B = lshr i16 %A, 5		; <i16> [#uses=1]
+	%C = shl i16 %A, 11		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i8 @rotl8(i8 %A, i8 %Amt) {
+	%B = shl i8 %A, %Amt		; <i8> [#uses=1]
+	%Amt2 = sub i8 8, %Amt		; <i8> [#uses=1]
+	%C = lshr i8 %A, %Amt2		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}
+
+define i8 @rotr8(i8 %A, i8 %Amt) {
+	%B = lshr i8 %A, %Amt		; <i8> [#uses=1]
+	%Amt2 = sub i8 8, %Amt		; <i8> [#uses=1]
+	%C = shl i8 %A, %Amt2		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}
+
+define i8 @rotli8(i8 %A) {
+	%B = shl i8 %A, 5		; <i8> [#uses=1]
+	%C = lshr i8 %A, 3		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}
+
+define i8 @rotri8(i8 %A) {
+	%B = lshr i8 %A, 5		; <i8> [#uses=1]
+	%C = shl i8 %A, 3		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}

diff --git a/test/CodeGen/X86/rotate2.ll b/test/CodeGen/X86/rotate2.ll
new file mode 100644
index 0000000..2eea399
--- /dev/null
+++ b/test/CodeGen/X86/rotate2.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep rol | count 2
+
+define i64 @test1(i64 %x) nounwind  {
+entry:
+	%tmp2 = lshr i64 %x, 55		; <i64> [#uses=1]
+	%tmp4 = shl i64 %x, 9		; <i64> [#uses=1]
+	%tmp5 = or i64 %tmp2, %tmp4		; <i64> [#uses=1]
+	ret i64 %tmp5
+}
+
+define i64 @test2(i32 %x) nounwind  {
+entry:
+	%tmp2 = lshr i32 %x, 22		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 10		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp2, %tmp4		; <i32> [#uses=1]
+	%tmp56 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	ret i64 %tmp56
+}
+

diff --git a/test/CodeGen/X86/scalar-extract.ll b/test/CodeGen/X86/scalar-extract.ll
new file mode 100644
index 0000000..2845838
--- /dev/null
+++ b/test/CodeGen/X86/scalar-extract.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx -o %t
+; RUN: not grep movq  %t
+
+; Check that widening doesn't introduce a mmx register in this case when
+; a simple load/store would suffice.
+
+define void @foo(<2 x i16>* %A, <2 x i16>* %B) {
+entry:
+	%tmp1 = load <2 x i16>* %A		; <<2 x i16>> [#uses=1]
+	store <2 x i16> %tmp1, <2 x i16>* %B
+	ret void
+}
+

diff --git a/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/test/CodeGen/X86/scalar-min-max-fill-operand.ll
new file mode 100644
index 0000000..fe40758
--- /dev/null
+++ b/test/CodeGen/X86/scalar-min-max-fill-operand.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep min | count 1
+; RUN: llc < %s -march=x86-64 | grep max | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
+
+declare float @bar()
+
+define float @foo(float %a) nounwind
+{
+  %s = call float @bar()
+  %t = fcmp olt float %s, %a
+  %u = select i1 %t, float %s, float %a
+  ret float %u
+}
+define float @hem(float %a) nounwind
+{
+  %s = call float @bar()
+  %t = fcmp ogt float %s, %a
+  %u = select i1 %t, float %s, float %a
+  ret float %u
+}

diff --git a/test/CodeGen/X86/scalar_sse_minmax.ll b/test/CodeGen/X86/scalar_sse_minmax.ll
new file mode 100644
index 0000000..bc4ab5d
--- /dev/null
+++ b/test/CodeGen/X86/scalar_sse_minmax.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
+; RUN:   grep mins | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
+; RUN:   grep maxs | count 2
+
+declare i1 @llvm.isunordered.f64(double, double)
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+define float @min1(float %x, float %y) {
+	%tmp = fcmp olt float %x, %y		; <i1> [#uses=1]
+	%retval = select i1 %tmp, float %x, float %y		; <float> [#uses=1]
+	ret float %retval
+}
+
+define double @min2(double %x, double %y) {
+	%tmp = fcmp olt double %x, %y		; <i1> [#uses=1]
+	%retval = select i1 %tmp, double %x, double %y		; <double> [#uses=1]
+	ret double %retval
+}
+
+define float @max1(float %x, float %y) {
+	%tmp = fcmp oge float %x, %y		; <i1> [#uses=1]
+	%tmp2 = fcmp uno float %x, %y		; <i1> [#uses=1]
+	%tmp3 = or i1 %tmp2, %tmp		; <i1> [#uses=1]
+	%retval = select i1 %tmp3, float %x, float %y		; <float> [#uses=1]
+	ret float %retval
+}
+
+define double @max2(double %x, double %y) {
+	%tmp = fcmp oge double %x, %y		; <i1> [#uses=1]
+	%tmp2 = fcmp uno double %x, %y		; <i1> [#uses=1]
+	%tmp3 = or i1 %tmp2, %tmp		; <i1> [#uses=1]
+	%retval = select i1 %tmp3, double %x, double %y		; <double> [#uses=1]
+	ret double %retval
+}
+
+define <4 x float> @min3(float %tmp37) {
+	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp48
+}
+
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)

diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
new file mode 100644
index 0000000..fc67e44
--- /dev/null
+++ b/test/CodeGen/X86/scalar_widen_div.ll

@@ -0,0 +1,154 @@
+; RUN: llc < %s -disable-mmx -march=x86-64 -mattr=+sse42 |  FileCheck %s
+
+; Verify when widening a divide/remainder operation, we only generate a
+; divide/rem per element since divide/remainder can trap.
+
+define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+entry:
+  %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %index = alloca i32, align 4
+  store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
+  store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
+  store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
+  %tmp = load <2 x i32> addrspace(1)** %qdest.addr
+  %tmp1 = load i32* %index
+  %arrayidx = getelementptr <2 x i32> addrspace(1)* %tmp, i32 %tmp1
+  %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr
+  %tmp3 = load i32* %index
+  %arrayidx4 = getelementptr <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
+  %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4
+  %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr
+  %tmp7 = load i32* %index
+  %arrayidx8 = getelementptr <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
+  %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8
+  %tmp10 = sdiv <2 x i32> %tmp5, %tmp9
+  store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
+  ret void
+}
+
+define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK-NOT: idivb
+; CHECK: ret
+  %div.r = sdiv <3 x i8> %num, %div
+  ret <3 x i8>  %div.r
+}
+
+define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
+; CHECK: divb
+; CHECK: divb
+; CHECK: divb
+; CHECK-NOT: divb
+; CHECK: ret
+  %div.r = udiv <3 x i8> %num, %div
+  ret <3 x i8>  %div.r
+}
+
+define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK-NOT: idivw
+; CHECK: ret
+  %div.r = sdiv <5 x i16> %num, %div
+  ret <5 x i16>  %div.r
+}
+
+define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
+; CHECK: divw
+; CHECK: divw
+; CHECK: divw
+; CHECK: divw
+; CHECK-NOT: divw
+; CHECK: ret
+  %div.r = udiv <4 x i16> %num, %div
+  ret <4 x i16>  %div.r
+}
+
+define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK-NOT: divl
+; CHECK: ret
+  %div.r = udiv <3 x i32> %num, %div
+  ret <3 x i32>  %div.r
+}
+
+define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
+; CHECK: idivq
+; CHECK: idivq
+; CHECK: idivq
+; CHECK-NOT: idivq
+; CHECK: ret
+  %div.r = sdiv <3 x i64> %num, %div
+  ret <3 x i64>  %div.r
+}
+
+define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK-NOT: divq
+; CHECK: ret
+  %div.r = udiv <3 x i64> %num, %div
+  ret <3 x i64>  %div.r
+}
+
+
+define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK-NOT: idivb
+; CHECK: ret
+  %rem.r = srem <4 x i8> %num, %rem
+  ret <4 x i8>  %rem.r
+}
+
+define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK-NOT: idivw
+; CHECK: ret
+  %rem.r = srem <5 x i16> %num, %rem
+  ret <5 x i16>  %rem.r
+}
+
+define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+  %rem.r = srem <4 x i32> %num, %rem
+  ret <4 x i32>  %rem.r
+}
+
+
+define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK-NOT: divq
+; CHECK: ret
+  %rem.r = urem <5 x i64> %num, %rem
+  ret <5 x i64>  %rem.r
+}

diff --git a/test/CodeGen/X86/scalarize-bitcast.ll b/test/CodeGen/X86/scalarize-bitcast.ll
new file mode 100644
index 0000000..f6b29ec
--- /dev/null
+++ b/test/CodeGen/X86/scalarize-bitcast.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+; PR3886
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+
+define void @mmxCombineMaskU(i32* nocapture %src, i32* nocapture %mask) nounwind {
+entry:
+	%tmp1 = load i32* %src		; <i32> [#uses=1]
+	%0 = insertelement <2 x i32> undef, i32 %tmp1, i32 0		; <<2 x i32>> [#uses=1]
+	%1 = insertelement <2 x i32> %0, i32 0, i32 1		; <<2 x i32>> [#uses=1]
+	%conv.i.i = bitcast <2 x i32> %1 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp2.i.i = extractelement <1 x i64> %conv.i.i, i32 0		; <i64> [#uses=1]
+	%tmp22.i = bitcast i64 %tmp2.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp15.i = extractelement <1 x i64> %tmp22.i, i32 0		; <i64> [#uses=1]
+	%conv.i26.i = bitcast i64 %tmp15.i to <8 x i8>		; <<8 x i8>> [#uses=1]
+	%shuffle.i.i = shufflevector <8 x i8> %conv.i26.i, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>		; <<8 x i8>> [#uses=1]
+	%conv6.i.i = bitcast <8 x i8> %shuffle.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp12.i.i = extractelement <1 x i64> %conv6.i.i, i32 0		; <i64> [#uses=1]
+	%tmp10.i = bitcast i64 %tmp12.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp24.i = extractelement <1 x i64> %tmp10.i, i32 0		; <i64> [#uses=1]
+	%tmp10 = bitcast i64 %tmp24.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp7 = extractelement <1 x i64> %tmp10, i32 0		; <i64> [#uses=1]
+	%call6 = tail call i32 (...)* @store8888(i64 %tmp7)		; <i32> [#uses=1]
+	store i32 %call6, i32* %src
+	ret void
+}
+
+declare i32 @store8888(...)

diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll
new file mode 100644
index 0000000..81c919f
--- /dev/null
+++ b/test/CodeGen/X86/scev-interchange.ll

@@ -0,0 +1,338 @@
+; RUN: llc < %s -march=x86-64
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%"struct.DataOutBase::GmvFlags" = type { i32 }
+	%"struct.FE_DGPNonparametric<3>" = type { [1156 x i8], i32, %"struct.PolynomialSpace<1>" }
+	%"struct.FiniteElementData<1>" = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Line = type { [2 x i32] }
+	%"struct.PolynomialSpace<1>" = type { %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >", i32, %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >" }
+	%"struct.Polynomials::Polynomial<double>" = type { %struct.Subscriptor, %"struct.std::vector<double,std::allocator<double> >" }
+	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
+	%"struct.TableBase<2,double>" = type { %struct.Subscriptor, double*, i32, %"struct.TableIndices<2>" }
+	%"struct.TableIndices<2>" = type { %struct.Line }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i64*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i64* }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" = type { %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"* }
+	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
+	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
+	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" = type { %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"* }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+	%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
+	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+	%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" }
+
+declare void @_Unwind_Resume(i8*)
+
+declare i8* @_Znwm(i64)
+
+declare fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"*)
+
+declare fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* nocapture, i32, i32)
+
+declare fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* nocapture, i64, i8* nocapture)
+
+declare fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture, i64, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture)
+
+declare fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* nocapture, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"*, i32, %"struct.PolynomialSpace<1>"* nocapture, %"struct.FiniteElementData<1>"* nocapture, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture)
+
+declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, %"struct.std::vector<int,std::allocator<int> >"* nocapture)
+
+declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32)
+
+define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) {
+entry:
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef)
+			to label %invcont.i unwind label %lpad.i
+
+invcont.i:		; preds = %entry
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont1.i unwind label %lpad120.i
+
+invcont1.i:		; preds = %invcont.i
+	invoke fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef, i64 undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef)
+			to label %invcont3.i unwind label %lpad124.i
+
+invcont3.i:		; preds = %invcont1.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont4.i unwind label %lpad128.i
+
+invcont4.i:		; preds = %invcont3.i
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 undef, i8* undef)
+			to label %invcont6.i unwind label %lpad132.i
+
+invcont6.i:		; preds = %invcont4.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont7.i unwind label %lpad136.i
+
+invcont7.i:		; preds = %invcont6.i
+	invoke fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias sret undef, i32 %degree)
+			to label %invcont9.i unwind label %lpad140.i
+
+invcont9.i:		; preds = %invcont7.i
+	invoke fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* undef, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %invcont10.i unwind label %lpad144.i
+
+invcont10.i:		; preds = %invcont9.i
+	invoke fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"* undef, i32 %degree, %"struct.PolynomialSpace<1>"* undef, %"struct.FiniteElementData<1>"* undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef)
+			to label %bb14.i unwind label %lpad148.i
+
+bb14.i:		; preds = %invcont10.i
+	br i1 false, label %bb3.i164.i, label %bb.i.i.i.i160.i
+
+bb.i.i.i.i160.i:		; preds = %bb14.i
+	unreachable
+
+bb3.i164.i:		; preds = %bb14.i
+	br i1 undef, label %bb10.i168.i, label %bb.i.i.i20.i166.i
+
+bb.i.i.i20.i166.i:		; preds = %bb3.i164.i
+	unreachable
+
+bb10.i168.i:		; preds = %bb3.i164.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb21.i unwind label %lpad144.i
+
+bb21.i:		; preds = %bb10.i168.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb28.i unwind label %lpad140.i
+
+bb28.i:		; preds = %bb21.i
+	br i1 undef, label %bb35.i, label %bb.i.i.i175.i
+
+bb.i.i.i175.i:		; preds = %bb28.i
+	br label %bb35.i
+
+bb35.i:		; preds = %bb.i.i.i175.i, %bb28.i
+	br i1 undef, label %bb42.i, label %bb.i.i.i205.i
+
+bb.i.i.i205.i:		; preds = %bb35.i
+	unreachable
+
+bb42.i:		; preds = %bb35.i
+	br i1 undef, label %bb47.i, label %bb.i.i.i213.i
+
+bb.i.i.i213.i:		; preds = %bb42.i
+	unreachable
+
+bb47.i:		; preds = %bb42.i
+	br i1 undef, label %bb59.i, label %bb.i.i.i247.i
+
+bb.i.i.i247.i:		; preds = %bb47.i
+	unreachable
+
+bb59.i:		; preds = %bb47.i
+	br i1 undef, label %bb66.i, label %bb.i.i.i255.i
+
+bb.i.i.i255.i:		; preds = %bb59.i
+	unreachable
+
+bb66.i:		; preds = %bb59.i
+	br i1 undef, label %bb71.i, label %bb.i.i.i262.i
+
+bb.i.i.i262.i:		; preds = %bb66.i
+	br label %bb71.i
+
+bb71.i:		; preds = %bb.i.i.i262.i, %bb66.i
+	%tmp11.i.i29.i.i.i.i.i.i = invoke i8* @_Znwm(i64 12)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i unwind label %lpad.i.i.i.i.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i.i.i:		; preds = %bb71.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i:		; preds = %bb71.i
+	br i1 undef, label %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i, label %bb.i.i.i.i94.i
+
+bb.i.i.i.i94.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	unreachable
+
+_ZNSt6vectorIjSaIjEED1Ev.exit.i.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	%tmp11.i.i29.i.i.i.i5.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i unwind label %lpad.i.i.i.i8.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i8.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i9.i.i unwind label %lpad.i19.i.i
+
+.noexc.i9.i.i:		; preds = %lpad.i.i.i.i8.i.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb.i.i.i.i.i.i.i.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb50.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %bb83.i unwind label %lpad188.i
+
+lpad.i19.i.i:		; preds = %lpad.i.i.i.i8.i.i
+	unreachable
+
+bb83.i:		; preds = %bb50.i.i.i
+	br i1 undef, label %invcont84.i, label %bb.i.i.i221.i
+
+bb.i.i.i221.i:		; preds = %bb83.i
+	unreachable
+
+invcont84.i:		; preds = %bb83.i
+	%tmp11.i.i29.i.i.i.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i unwind label %lpad.i.i.i.i315.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i315.i:		; preds = %invcont84.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i316.i unwind label %lpad.i352.i
+
+.noexc.i316.i:		; preds = %lpad.i.i.i.i315.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i:		; preds = %invcont84.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb.i.i.i.i.i.i.i.i320.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb50.i.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %invcont86.i unwind label %lpad200.i
+
+lpad.i352.i:		; preds = %lpad.i.i.i.i315.i
+	unreachable
+
+invcont86.i:		; preds = %bb50.i.i
+	invoke fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"* undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %.noexc380.i unwind label %lpad204.i
+
+.noexc380.i:		; preds = %invcont86.i
+	br i1 undef, label %bb100.i, label %bb.i.i.i198.i
+
+bb.i.i.i198.i:		; preds = %.noexc380.i
+	unreachable
+
+bb100.i:		; preds = %.noexc380.i
+	br i1 undef, label %invcont101.i, label %bb.i.i.i190.i
+
+bb.i.i.i190.i:		; preds = %bb100.i
+	unreachable
+
+invcont101.i:		; preds = %bb100.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit.i.i unwind label %lpad.i.i.i.i.i
+
+lpad.i.i.i.i.i:		; preds = %invcont101.i
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit.i.i:		; preds = %invcont101.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit28.i.i unwind label %lpad.i.i.i27.i.i
+
+lpad.i.i.i27.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i.i unwind label %lpad.i.i
+
+.noexc.i.i:		; preds = %lpad.i.i.i27.i.i
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit28.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+	br i1 undef, label %bb58.i.i, label %bb.i.i.i304.i.i
+
+bb.i.i.i304.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	unreachable
+
+bb58.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	br i1 false, label %bb.i191.i, label %bb.i.i.i297.i.i
+
+bb.i.i.i297.i.i:		; preds = %bb58.i.i
+	unreachable
+
+lpad.i.i:		; preds = %lpad.i.i.i27.i.i
+	unreachable
+
+bb.i191.i:		; preds = %.noexc232.i, %bb58.i.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %.noexc232.i unwind label %lpad196.i
+
+.noexc232.i:		; preds = %bb.i191.i
+	br i1 undef, label %bb29.loopexit.i.i, label %bb.i191.i
+
+bb7.i215.i:		; preds = %bb9.i216.i
+	br i1 undef, label %bb16.preheader.i.i, label %bb8.i.i
+
+bb8.i.i:		; preds = %bb7.i215.i
+	%tmp60.i.i = add i32 %0, 1		; <i32> [#uses=1]
+	br label %bb9.i216.i
+
+bb9.i216.i:		; preds = %bb29.loopexit.i.i, %bb8.i.i
+	%0 = phi i32 [ 0, %bb29.loopexit.i.i ], [ %tmp60.i.i, %bb8.i.i ]		; <i32> [#uses=2]
+	br i1 undef, label %bb7.i215.i, label %bb16.preheader.i.i
+
+bb15.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	%j1.0212.i.i = phi i32 [ %1, %bb15.i.i ], [ 0, %bb16.preheader.i.i ]		; <i32> [#uses=2]
+	%tmp6.i.i195.i.i = load i32* undef, align 4		; <i32> [#uses=1]
+	%tmp231.i.i = mul i32 %0, %tmp6.i.i195.i.i		; <i32> [#uses=1]
+	%tmp13.i197.i.i = add i32 %j1.0212.i.i, %tmp231.i.i		; <i32> [#uses=0]
+	%1 = add i32 %j1.0212.i.i, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb15.i.i, label %bb17.i.i
+
+bb17.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	br label %bb16.preheader.i.i
+
+bb16.preheader.i.i:		; preds = %bb17.i.i, %bb9.i216.i, %bb7.i215.i
+	br i1 undef, label %bb17.i.i, label %bb15.i.i
+
+bb29.loopexit.i.i:		; preds = %.noexc232.i
+	br label %bb9.i216.i
+
+lpad.i:		; preds = %entry
+	unreachable
+
+lpad120.i:		; preds = %invcont.i
+	unreachable
+
+lpad124.i:		; preds = %invcont1.i
+	unreachable
+
+lpad128.i:		; preds = %invcont3.i
+	unreachable
+
+lpad132.i:		; preds = %invcont4.i
+	unreachable
+
+lpad136.i:		; preds = %invcont6.i
+	unreachable
+
+lpad140.i:		; preds = %bb21.i, %invcont7.i
+	unreachable
+
+lpad144.i:		; preds = %bb10.i168.i, %invcont9.i
+	unreachable
+
+lpad148.i:		; preds = %invcont10.i
+	unreachable
+
+lpad188.i:		; preds = %bb50.i.i.i
+	unreachable
+
+lpad196.i:		; preds = %bb.i191.i
+	unreachable
+
+lpad200.i:		; preds = %bb50.i.i
+	unreachable
+
+lpad204.i:		; preds = %invcont86.i
+	unreachable
+}
+
+declare fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias nocapture sret, i32)

diff --git a/test/CodeGen/X86/select-aggregate.ll b/test/CodeGen/X86/select-aggregate.ll
new file mode 100644
index 0000000..44cafe2
--- /dev/null
+++ b/test/CodeGen/X86/select-aggregate.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR5757
+
+; CHECK: cmovneq %rdi, %rsi
+; CHECK: movl (%rsi), %eax
+
+%0 = type { i64, i32 }
+
+define i32 @foo(%0* %p, %0* %q, i1 %r) nounwind {
+  %t0 = load %0* %p
+  %t1 = load %0* %q
+  %t4 = select i1 %r, %0 %t0, %0 %t1
+  %t5 = extractvalue %0 %t4, 1
+  ret i32 %t5
+}

diff --git a/test/CodeGen/X86/select-zero-one.ll b/test/CodeGen/X86/select-zero-one.ll
new file mode 100644
index 0000000..c38a020
--- /dev/null
+++ b/test/CodeGen/X86/select-zero-one.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep cmov
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movzbl | count 1
+
+@r1 = weak global i32 0
+
+define void @t1(i32 %a, double %b) {
+  %tmp114 = fcmp ugt double %b, 1.000000e-09
+  %tmp120 = icmp eq i32 %a, 0		; <i1> [#uses=1]
+  %bothcond = or i1 %tmp114, %tmp120		; <i1> [#uses=1]
+  %storemerge = select i1 %bothcond, i32 0, i32 1		; <i32> [#uses=2]
+  store i32 %storemerge, i32* @r1, align 4
+  ret void
+}
+
+@r2 = weak global i8 0
+
+define void @t2(i32 %a, double %b) {
+  %tmp114 = fcmp ugt double %b, 1.000000e-09
+  %tmp120 = icmp eq i32 %a, 0		; <i1> [#uses=1]
+  %bothcond = or i1 %tmp114, %tmp120		; <i1> [#uses=1]
+  %storemerge = select i1 %bothcond, i8 0, i8 1		; <i32> [#uses=2]
+  store i8 %storemerge, i8* @r2, align 4
+  ret void
+}

diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
new file mode 100644
index 0000000..95ed9e9
--- /dev/null
+++ b/test/CodeGen/X86/select.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium 
+; RUN: llc < %s -march=x86 -mcpu=yonah 
+; RUN: llc < %s -march=x86 -mcpu=yonah  | not grep set
+
+define i1 @boolSel(i1 %A, i1 %B, i1 %C) nounwind {
+	%X = select i1 %A, i1 %B, i1 %C		; <i1> [#uses=1]
+	ret i1 %X
+}
+
+define i8 @byteSel(i1 %A, i8 %B, i8 %C) nounwind {
+	%X = select i1 %A, i8 %B, i8 %C		; <i8> [#uses=1]
+	ret i8 %X
+}
+
+define i16 @shortSel(i1 %A, i16 %B, i16 %C) nounwind {
+	%X = select i1 %A, i16 %B, i16 %C		; <i16> [#uses=1]
+	ret i16 %X
+}
+
+define i32 @intSel(i1 %A, i32 %B, i32 %C) nounwind {
+	%X = select i1 %A, i32 %B, i32 %C		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i64 @longSel(i1 %A, i64 %B, i64 %C) nounwind {
+	%X = select i1 %A, i64 %B, i64 %C		; <i64> [#uses=1]
+	ret i64 %X
+}
+
+define double @doubleSel(i1 %A, double %B, double %C) nounwind {
+	%X = select i1 %A, double %B, double %C		; <double> [#uses=1]
+	ret double %X
+}
+
+define i8 @foldSel(i1 %A, i8 %B, i8 %C) nounwind {
+	%Cond = icmp slt i8 %B, %C		; <i1> [#uses=1]
+	%X = select i1 %Cond, i8 %B, i8 %C		; <i8> [#uses=1]
+	ret i8 %X
+}
+
+define i32 @foldSel2(i1 %A, i32 %B, i32 %C) nounwind {
+	%Cond = icmp eq i32 %B, %C		; <i1> [#uses=1]
+	%X = select i1 %Cond, i32 %B, i32 %C		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i32 @foldSel2a(i1 %A, i32 %B, i32 %C, double %X, double %Y) nounwind {
+	%Cond = fcmp olt double %X, %Y		; <i1> [#uses=1]
+	%X.upgrd.1 = select i1 %Cond, i32 %B, i32 %C		; <i32> [#uses=1]
+	ret i32 %X.upgrd.1
+}
+
+define float @foldSel3(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
+	%Cond = icmp ult i32 %X, %Y		; <i1> [#uses=1]
+	%X.upgrd.2 = select i1 %Cond, float %B, float %C		; <float> [#uses=1]
+	ret float %X.upgrd.2
+}
+
+define float @nofoldSel4(i1 %A, float %B, float %C, i32 %X, i32 %Y) nounwind {
+	%Cond = icmp slt i32 %X, %Y		; <i1> [#uses=1]
+	%X.upgrd.3 = select i1 %Cond, float %B, float %C		; <float> [#uses=1]
+	ret float %X.upgrd.3
+}

diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
new file mode 100644
index 0000000..c37e15d
--- /dev/null
+++ b/test/CodeGen/X86/setcc.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7329206
+
+; Use sbb x, x to materialize carry bit in a GPR. The value is either
+; all 1's or all 0's.
+
+define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: seta %al
+; CHECK: movzbl %al, %eax
+; CHECK: shll $5, %eax
+  %0 = icmp ugt i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.1.0
+}
+
+define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: sbbl %eax, %eax
+; CHECK: andl $32, %eax
+  %0 = icmp ult i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.0.0
+}
+
+define i64 @t3(i64 %x) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: sbbq %rax, %rax
+; CHECK: andq $64, %rax
+  %0 = icmp ult i64 %x, 18                        ; <i1> [#uses=1]
+  %iftmp.2.0 = select i1 %0, i64 64, i64 0        ; <i64> [#uses=1]
+  ret i64 %iftmp.2.0
+}

diff --git a/test/CodeGen/X86/setoeq.ll b/test/CodeGen/X86/setoeq.ll
new file mode 100644
index 0000000..4a9c1ba
--- /dev/null
+++ b/test/CodeGen/X86/setoeq.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86  | grep set | count 2
+; RUN: llc < %s -march=x86  | grep and
+
+define zeroext i8 @t(double %x) nounwind readnone {
+entry:
+	%0 = fptosi double %x to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fcmp oeq double %1, %x		; <i1> [#uses=1]
+	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+	ret i8 %retval12
+}

diff --git a/test/CodeGen/X86/setuge.ll b/test/CodeGen/X86/setuge.ll
new file mode 100644
index 0000000..4ca2f18
--- /dev/null
+++ b/test/CodeGen/X86/setuge.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86  | not grep set
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+define float @cmp(float %A, float %B, float %C, float %D) nounwind {
+entry:
+        %tmp.1 = fcmp uno float %A, %B          ; <i1> [#uses=1]
+        %tmp.2 = fcmp oge float %A, %B          ; <i1> [#uses=1]
+        %tmp.3 = or i1 %tmp.1, %tmp.2           ; <i1> [#uses=1]
+        %tmp.4 = select i1 %tmp.3, float %C, float %D           ; <float> [#uses=1]
+        ret float %tmp.4
+}
+

diff --git a/test/CodeGen/X86/sext-i1.ll b/test/CodeGen/X86/sext-i1.ll
new file mode 100644
index 0000000..21c418d
--- /dev/null
+++ b/test/CodeGen/X86/sext-i1.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=64
+; rdar://7573216
+; PR6146
+
+define i32 @t1(i32 %x) nounwind readnone ssp {
+entry:
+; 32: t1:
+; 32: cmpl $1
+; 32: sbbl
+
+; 64: t1:
+; 64: cmpl $1
+; 64: sbbl
+  %0 = icmp eq i32 %x, 0
+  %iftmp.0.0 = select i1 %0, i32 -1, i32 0
+  ret i32 %iftmp.0.0
+}
+
+define i32 @t2(i32 %x) nounwind readnone ssp {
+entry:
+; 32: t2:
+; 32: cmpl $1
+; 32: sbbl
+
+; 64: t2:
+; 64: cmpl $1
+; 64: sbbl
+  %0 = icmp eq i32 %x, 0
+  %iftmp.0.0 = sext i1 %0 to i32
+  ret i32 %iftmp.0.0
+}
+
+%struct.zbookmark = type { i64, i64 }
+%struct.zstream = type { }
+
+define i32 @t3() nounwind readonly {
+entry:
+; 32: t3:
+; 32: cmpl $1
+; 32: sbbl
+; 32: cmpl
+; 32: xorl
+
+; 64: t3:
+; 64: cmpl $1
+; 64: sbbq
+; 64: cmpq
+; 64: xorl
+  %not.tobool = icmp eq i32 undef, 0              ; <i1> [#uses=2]
+  %cond = sext i1 %not.tobool to i32              ; <i32> [#uses=1]
+  %conv = sext i1 %not.tobool to i64              ; <i64> [#uses=1]
+  %add13 = add i64 0, %conv                       ; <i64> [#uses=1]
+  %cmp = icmp ult i64 undef, %add13               ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %xor27 = xor i32 undef, %cond                   ; <i32> [#uses=0]
+  ret i32 0
+}

diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
new file mode 100644
index 0000000..c9b39d3
--- /dev/null
+++ b/test/CodeGen/X86/sext-load.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep movsbl
+
+define i32 @foo(i32 %X) nounwind  {
+entry:
+	%tmp12 = trunc i32 %X to i8		; <i8> [#uses=1]
+	%tmp123 = sext i8 %tmp12 to i32		; <i32> [#uses=1]
+	ret i32 %tmp123
+}
+

diff --git a/test/CodeGen/X86/sext-ret-val.ll b/test/CodeGen/X86/sext-ret-val.ll
new file mode 100644
index 0000000..da1a187
--- /dev/null
+++ b/test/CodeGen/X86/sext-ret-val.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 | grep movzbl | count 1
+; rdar://6699246
+
+define signext i8 @t1(i8* %A) nounwind readnone ssp {
+entry:
+        %0 = icmp ne i8* %A, null
+        %1 = zext i1 %0 to i8
+        ret i8 %1
+}
+
+define i8 @t2(i8* %A) nounwind readnone ssp {
+entry:
+        %0 = icmp ne i8* %A, null
+        %1 = zext i1 %0 to i8
+        ret i8 %1
+}

diff --git a/test/CodeGen/X86/sext-select.ll b/test/CodeGen/X86/sext-select.ll
new file mode 100644
index 0000000..4aca040
--- /dev/null
+++ b/test/CodeGen/X86/sext-select.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep movsw
+; PR2139
+
+declare void @abort()
+
+define i32 @main() {
+entry:
+	%tmp73 = tail call i1 @return_false()		; <i8> [#uses=1]
+	%g.0 = select i1 %tmp73, i16 0, i16 -480		; <i16> [#uses=2]
+	%tmp7778 = sext i16 %g.0 to i32		; <i32> [#uses=1]
+	%tmp80 = shl i32 %tmp7778, 3		; <i32> [#uses=2]
+	%tmp87 = icmp sgt i32 %tmp80, 32767		; <i1> [#uses=1]
+	br i1 %tmp87, label %bb90, label %bb91
+bb90:		; preds = %bb84, %bb72
+	tail call void @abort()
+	unreachable
+bb91:		; preds = %bb84
+	ret i32 0
+}
+
+define i1 @return_false() {
+	ret i1 0
+}

diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
new file mode 100644
index 0000000..b2b9f81
--- /dev/null
+++ b/test/CodeGen/X86/sext-subreg.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7529457
+
+define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+; CHECK: t:
+; CHECK: movslq %e{{.*}}, %rax
+; CHECK: movq %rax
+; CHECK: movl %eax
+  %C = add i64 %A, %B
+  %D = trunc i64 %C to i32
+  volatile store i32 %D, i32* %P
+  %E = shl i64 %C, 32
+  %F = ashr i64 %E, 32  
+  volatile store i64 %F, i64 *%P2
+  volatile store i32 %D, i32* %P
+  ret i64 undef
+}

diff --git a/test/CodeGen/X86/sext-trunc.ll b/test/CodeGen/X86/sext-trunc.ll
new file mode 100644
index 0000000..2eaf425
--- /dev/null
+++ b/test/CodeGen/X86/sext-trunc.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep movsbl %t
+; RUN: not grep movz %t
+; RUN: not grep and %t
+
+define i8 @foo(i16 signext  %x) signext nounwind  {
+	%retval56 = trunc i16 %x to i8
+	ret i8 %retval56
+}

diff --git a/test/CodeGen/X86/sfence.ll b/test/CodeGen/X86/sfence.ll
new file mode 100644
index 0000000..4782879
--- /dev/null
+++ b/test/CodeGen/X86/sfence.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep sfence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true)
+	ret void
+}

diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
new file mode 100644
index 0000000..fd278c2
--- /dev/null
+++ b/test/CodeGen/X86/shift-and.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86-64 | not grep and 
+
+define i32 @t1(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 31
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
+@X = internal global i16 0
+
+define void @t2(i16 %t) nounwind {
+       %shamt = and i16 %t, 31
+       %tmp = load i16* @X
+       %tmp1 = ashr i16 %tmp, %shamt
+       store i16 %tmp1, i16* @X
+       ret void
+}
+
+define i64 @t3(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 63
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}

diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
new file mode 100644
index 0000000..d38f9a8
--- /dev/null
+++ b/test/CodeGen/X86/shift-coalesce.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {shld.*CL}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep {mov CL, BL}
+
+; PR687
+
+define i64 @foo(i64 %x, i64* %X) {
+        %tmp.1 = load i64* %X           ; <i64> [#uses=1]
+        %tmp.3 = trunc i64 %tmp.1 to i8         ; <i8> [#uses=1]
+        %shift.upgrd.1 = zext i8 %tmp.3 to i64          ; <i64> [#uses=1]
+        %tmp.4 = shl i64 %x, %shift.upgrd.1             ; <i64> [#uses=1]
+        ret i64 %tmp.4
+}
+

diff --git a/test/CodeGen/X86/shift-codegen.ll b/test/CodeGen/X86/shift-codegen.ll
new file mode 100644
index 0000000..4cba183
--- /dev/null
+++ b/test/CodeGen/X86/shift-codegen.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -relocation-model=static -march=x86 | \
+; RUN:   grep {shll	\$3} | count 2
+
+; This should produce two shll instructions, not any lea's.
+
+target triple = "i686-apple-darwin8"
+@Y = weak global i32 0          ; <i32*> [#uses=1]
+@X = weak global i32 0          ; <i32*> [#uses=2]
+
+
+define void @fn1() {
+entry:
+        %tmp = load i32* @Y             ; <i32> [#uses=1]
+        %tmp1 = shl i32 %tmp, 3         ; <i32> [#uses=1]
+        %tmp2 = load i32* @X            ; <i32> [#uses=1]
+        %tmp3 = or i32 %tmp1, %tmp2             ; <i32> [#uses=1]
+        store i32 %tmp3, i32* @X
+        ret void
+}
+
+define i32 @fn2(i32 %X, i32 %Y) {
+entry:
+        %tmp2 = shl i32 %Y, 3           ; <i32> [#uses=1]
+        %tmp4 = or i32 %tmp2, %X                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+

diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
new file mode 100644
index 0000000..e443ac1
--- /dev/null
+++ b/test/CodeGen/X86/shift-combine.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s | not grep shrl
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+
+define i32 @foo(i32 %x) {
+entry:
+	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
+	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
+	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+

diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll
new file mode 100644
index 0000000..5adee7c
--- /dev/null
+++ b/test/CodeGen/X86/shift-double.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {sh\[lr\]d} | count 5
+
+define i64 @test1(i64 %X, i8 %C) {
+        %shift.upgrd.1 = zext i8 %C to i64              ; <i64> [#uses=1]
+        %Y = shl i64 %X, %shift.upgrd.1         ; <i64> [#uses=1]
+        ret i64 %Y
+}
+
+define i64 @test2(i64 %X, i8 %C) {
+        %shift.upgrd.2 = zext i8 %C to i64              ; <i64> [#uses=1]
+        %Y = ashr i64 %X, %shift.upgrd.2                ; <i64> [#uses=1]
+        ret i64 %Y
+}
+
+define i64 @test3(i64 %X, i8 %C) {
+        %shift.upgrd.3 = zext i8 %C to i64              ; <i64> [#uses=1]
+        %Y = lshr i64 %X, %shift.upgrd.3                ; <i64> [#uses=1]
+        ret i64 %Y
+}
+
+define i32 @test4(i32 %A, i32 %B, i8 %C) {
+        %shift.upgrd.4 = zext i8 %C to i32              ; <i32> [#uses=1]
+        %X = shl i32 %A, %shift.upgrd.4         ; <i32> [#uses=1]
+        %Cv = sub i8 32, %C             ; <i8> [#uses=1]
+        %shift.upgrd.5 = zext i8 %Cv to i32             ; <i32> [#uses=1]
+        %Y = lshr i32 %B, %shift.upgrd.5                ; <i32> [#uses=1]
+        %Z = or i32 %Y, %X              ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i16 @test5(i16 %A, i16 %B, i8 %C) {
+        %shift.upgrd.6 = zext i8 %C to i16              ; <i16> [#uses=1]
+        %X = shl i16 %A, %shift.upgrd.6         ; <i16> [#uses=1]
+        %Cv = sub i8 16, %C             ; <i8> [#uses=1]
+        %shift.upgrd.7 = zext i8 %Cv to i16             ; <i16> [#uses=1]
+        %Y = lshr i16 %B, %shift.upgrd.7                ; <i16> [#uses=1]
+        %Z = or i16 %Y, %X              ; <i16> [#uses=1]
+        ret i16 %Z
+}
+

diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
new file mode 100644
index 0000000..872817f
--- /dev/null
+++ b/test/CodeGen/X86/shift-folding.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep {s\[ah\]\[rl\]l} | count 1
+
+define i32* @test1(i32* %P, i32 %X) {
+        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
+        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
+        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
+        ret i32* %P2
+}
+
+define i32* @test2(i32* %P, i32 %X) {
+        %Y = shl i32 %X, 2              ; <i32> [#uses=1]
+        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
+        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
+        ret i32* %P2
+}
+
+define i32* @test3(i32* %P, i32 %X) {
+        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
+        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
+        ret i32* %P2
+}
+

diff --git a/test/CodeGen/X86/shift-i128.ll b/test/CodeGen/X86/shift-i128.ll
new file mode 100644
index 0000000..c4d15ae
--- /dev/null
+++ b/test/CodeGen/X86/shift-i128.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @t(i128 %x, i128 %a, i128* nocapture %r) nounwind {
+entry:
+	%0 = lshr i128 %x, %a
+	store i128 %0, i128* %r, align 16
+	ret void
+}

diff --git a/test/CodeGen/X86/shift-i256.ll b/test/CodeGen/X86/shift-i256.ll
new file mode 100644
index 0000000..d5f65a6
--- /dev/null
+++ b/test/CodeGen/X86/shift-i256.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
+entry:
+	%0 = ashr i256 %x, %a
+	store i256 %0, i256* %r
+        ret void
+}

diff --git a/test/CodeGen/X86/shift-one.ll b/test/CodeGen/X86/shift-one.ll
new file mode 100644
index 0000000..0f80f90
--- /dev/null
+++ b/test/CodeGen/X86/shift-one.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | not grep leal
+
+@x = external global i32                ; <i32*> [#uses=1]
+
+define i32 @test() {
+        %tmp.0 = load i32* @x           ; <i32> [#uses=1]
+        %tmp.1 = shl i32 %tmp.0, 1              ; <i32> [#uses=1]
+        ret i32 %tmp.1
+}
+

diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll
new file mode 100644
index 0000000..ce4f538
--- /dev/null
+++ b/test/CodeGen/X86/shift-parts.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | grep shrdq
+; PR4736
+
+%0 = type { i32, i8, [35 x i8] }
+
+@g_144 = external global %0, align 8              ; <%0*> [#uses=1]
+
+define i32 @int87(i32 %uint64p_8) nounwind {
+entry:
+  %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; <i320> [#uses=1]
+  %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; <i320> [#uses=1]
+  %call3.in = trunc i320 %call3.in.in.in to i32   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call3.in, 0              ; <i1> [#uses=1]
+  br i1 %tobool, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %for.cond
+  ret i32 1
+}

diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll
new file mode 100644
index 0000000..4458891
--- /dev/null
+++ b/test/CodeGen/X86/shl_elim.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | grep {movl	8(.esp), %eax}
+; RUN: llc < %s -march=x86 | grep {shrl	.eax}
+; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}
+
+define i32 @test1(i64 %a) {
+        %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
+        %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
+        %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]
+        %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]
+        %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp456
+}
+

diff --git a/test/CodeGen/X86/shrink-fp-const1.ll b/test/CodeGen/X86/shrink-fp-const1.ll
new file mode 100644
index 0000000..49b9fa3
--- /dev/null
+++ b/test/CodeGen/X86/shrink-fp-const1.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
+; PR1264
+
+define double @foo(double %x) {
+        %y = fmul double %x, 5.000000e-01
+        ret double %y
+}

diff --git a/test/CodeGen/X86/shrink-fp-const2.ll b/test/CodeGen/X86/shrink-fp-const2.ll
new file mode 100644
index 0000000..3d5203b
--- /dev/null
+++ b/test/CodeGen/X86/shrink-fp-const2.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | grep flds
+; This should be a flds, not fldt.
+define x86_fp80 @test2() nounwind  {
+entry:
+	ret x86_fp80 0xK3FFFC000000000000000
+}
+

diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
new file mode 100644
index 0000000..13f9329
--- /dev/null
+++ b/test/CodeGen/X86/sincos.ll

@@ -0,0 +1,48 @@
+; Make sure this testcase codegens to the sin and cos instructions, not calls
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN:   grep sin\$ | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN:   grep cos\$ | count 3
+
+declare float  @sinf(float) readonly
+
+declare double @sin(double) readonly
+
+declare x86_fp80 @sinl(x86_fp80) readonly
+
+define float @test1(float %X) {
+        %Y = call float @sinf(float %X) readonly
+        ret float %Y
+}
+
+define double @test2(double %X) {
+        %Y = call double @sin(double %X) readonly
+        ret double %Y
+}
+
+define x86_fp80 @test3(x86_fp80 %X) {
+        %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
+        ret x86_fp80 %Y
+}
+
+declare float @cosf(float) readonly
+
+declare double @cos(double) readonly
+
+declare x86_fp80 @cosl(x86_fp80) readonly
+
+define float @test4(float %X) {
+        %Y = call float @cosf(float %X) readonly
+        ret float %Y
+}
+
+define double @test5(double %X) {
+        %Y = call double @cos(double %X) readonly
+        ret double %Y
+}
+
+define x86_fp80 @test6(x86_fp80 %X) {
+        %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
+        ret x86_fp80 %Y
+}
+

diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
new file mode 100644
index 0000000..01d7373
--- /dev/null
+++ b/test/CodeGen/X86/sink-hoist.ll

@@ -0,0 +1,148 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
+
+; Currently, floating-point selects are lowered to CFG triangles.
+; This means that one side of the select is always unconditionally
+; evaluated, however with MachineSink we can sink the other side so
+; that it's conditionally evaluated.
+
+; CHECK: foo:
+; CHECK:      divsd
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: jne
+; CHECK-NEXT: divsd
+
+define double @foo(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %b = fdiv double %y, 3.3
+  %z = select i1 %c, double %a, double %b
+  ret double %z
+}
+
+; Hoist floating-point constant-pool loads out of loops.
+
+; CHECK: bar:
+; CHECK: movsd
+; CHECK: align
+define void @bar(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %0 = icmp sgt i64 %n, 0
+  br i1 %0, label %bb, label %return
+
+bb:
+  %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
+  %scevgep = getelementptr double* %p, i64 %i.03
+  %1 = load double* %scevgep, align 8
+  %2 = fdiv double 3.200000e+00, %1
+  store double %2, double* %scevgep, align 8
+  %3 = add nsw i64 %i.03, 1
+  %exitcond = icmp eq i64 %3, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; Sink instructions with dead EFLAGS defs.
+
+; CHECK: zzz:
+; CHECK:      je
+; CHECK-NEXT: orb
+
+define zeroext i8 @zzz(i8 zeroext %a, i8 zeroext %b) nounwind readnone {
+entry:
+  %tmp = zext i8 %a to i32                        ; <i32> [#uses=1]
+  %tmp2 = icmp eq i8 %a, 0                    ; <i1> [#uses=1]
+  %tmp3 = or i8 %b, -128                          ; <i8> [#uses=1]
+  %tmp4 = and i8 %b, 127                          ; <i8> [#uses=1]
+  %b_addr.0 = select i1 %tmp2, i8 %tmp4, i8 %tmp3 ; <i8> [#uses=1]
+  ret i8 %b_addr.0
+}
+
+; Codegen should hoist and CSE these constants.
+
+; CHECK: vv:
+; CHECK: LCPI4_0(%rip), %xmm0
+; CHECK: LCPI4_1(%rip), %xmm1
+; CHECK: LCPI4_2(%rip), %xmm2
+; CHECK: align
+; CHECK-NOT: LCPI
+; CHECK: ret
+
+@_minusZero.6007 = internal constant <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> ; <<4 x float>*> [#uses=0]
+@twoTo23.6008 = internal constant <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06> ; <<4 x float>*> [#uses=0]
+
+define void @vv(float* %y, float* %x, i32* %n) nounwind ssp {
+entry:
+  br label %bb60
+
+bb:                                               ; preds = %bb60
+  %0 = bitcast float* %x_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  %1 = load <4 x float>* %0, align 16             ; <<4 x float>> [#uses=4]
+  %tmp20 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp22 = and <4 x i32> %tmp20, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
+  %tmp23 = bitcast <4 x i32> %tmp22 to <4 x float> ; <<4 x float>> [#uses=1]
+  %tmp25 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp27 = and <4 x i32> %tmp25, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=2]
+  %tmp30 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %tmp23, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) ; <<4 x float>> [#uses=1]
+  %tmp34 = bitcast <4 x float> %tmp30 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp36 = xor <4 x i32> %tmp34, <i32 -1, i32 -1, i32 -1, i32 -1> ; <<4 x i32>> [#uses=1]
+  %tmp37 = and <4 x i32> %tmp36, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200> ; <<4 x i32>> [#uses=1]
+  %tmp42 = or <4 x i32> %tmp37, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp43 = bitcast <4 x i32> %tmp42 to <4 x float> ; <<4 x float>> [#uses=2]
+  %tmp45 = fadd <4 x float> %1, %tmp43            ; <<4 x float>> [#uses=1]
+  %tmp47 = fsub <4 x float> %tmp45, %tmp43        ; <<4 x float>> [#uses=2]
+  %tmp49 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %1, <4 x float> %tmp47, i8 1) ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %tmp49 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %3 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %2) nounwind readnone ; <<4 x float>> [#uses=1]
+  %tmp53 = fadd <4 x float> %tmp47, %3            ; <<4 x float>> [#uses=1]
+  %tmp55 = bitcast <4 x float> %tmp53 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp57 = or <4 x i32> %tmp55, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp58 = bitcast <4 x i32> %tmp57 to <4 x float> ; <<4 x float>> [#uses=1]
+  %4 = bitcast float* %y_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  store <4 x float> %tmp58, <4 x float>* %4, align 16
+  %5 = getelementptr float* %x_addr.0, i64 4      ; <float*> [#uses=1]
+  %6 = getelementptr float* %y_addr.0, i64 4      ; <float*> [#uses=1]
+  %7 = add i32 %i.0, 4                            ; <i32> [#uses=1]
+  br label %bb60
+
+bb60:                                             ; preds = %bb, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %7, %bb ]       ; <i32> [#uses=2]
+  %x_addr.0 = phi float* [ %x, %entry ], [ %5, %bb ] ; <float*> [#uses=2]
+  %y_addr.0 = phi float* [ %y, %entry ], [ %6, %bb ] ; <float*> [#uses=2]
+  %8 = load i32* %n, align 4                      ; <i32> [#uses=1]
+  %9 = icmp sgt i32 %8, %i.0                      ; <i1> [#uses=1]
+  br i1 %9, label %bb, label %return
+
+return:                                           ; preds = %bb60
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+; CodeGen should use the correct register class when extracting
+; a load from a zero-extending load for hoisting.
+
+; CHECK: default_get_pch_validity:
+; CHECK: movl cl_options_count(%rip), %ecx
+
+@cl_options_count = external constant i32         ; <i32*> [#uses=2]
+
+define void @default_get_pch_validity() nounwind {
+entry:
+  %tmp4 = load i32* @cl_options_count, align 4    ; <i32> [#uses=1]
+  %tmp5 = icmp eq i32 %tmp4, 0                    ; <i1> [#uses=1]
+  br i1 %tmp5, label %bb6, label %bb2
+
+bb2:                                              ; preds = %bb2, %entry
+  %i.019 = phi i64 [ 0, %entry ], [ %tmp25, %bb2 ] ; <i64> [#uses=1]
+  %tmp25 = add i64 %i.019, 1                      ; <i64> [#uses=2]
+  %tmp11 = load i32* @cl_options_count, align 4   ; <i32> [#uses=1]
+  %tmp12 = zext i32 %tmp11 to i64                 ; <i64> [#uses=1]
+  %tmp13 = icmp ugt i64 %tmp12, %tmp25            ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb2, label %bb6
+
+bb6:                                              ; preds = %bb2, %entry
+  ret void
+}

diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
new file mode 100644
index 0000000..9ec9182e
--- /dev/null
+++ b/test/CodeGen/X86/small-byval-memcpy.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s | not grep movs
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %z) nounwind  {
+entry:
+	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp7 = load x86_fp80* %tmp6, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %real, align 16
+	store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %iz ) nounwind 
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind 

diff --git a/test/CodeGen/X86/smul-with-overflow-2.ll b/test/CodeGen/X86/smul-with-overflow-2.ll
new file mode 100644
index 0000000..7c23adb
--- /dev/null
+++ b/test/CodeGen/X86/smul-with-overflow-2.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep mul | count 1
+; RUN: llc < %s -march=x86 | grep add | count 3
+
+define i32 @t1(i32 %a, i32 %b) nounwind readnone {
+entry:
+        %tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+}
+
+define i32 @t2(i32 %a, i32 %b) nounwind readnone {
+entry:
+        %tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+}
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind

diff --git a/test/CodeGen/X86/smul-with-overflow-3.ll b/test/CodeGen/X86/smul-with-overflow-3.ll
new file mode 100644
index 0000000..49c31f5
--- /dev/null
+++ b/test/CodeGen/X86/smul-with-overflow-3.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep {jno} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)

diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
new file mode 100644
index 0000000..6d125e4
--- /dev/null
+++ b/test/CodeGen/X86/smul-with-overflow.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)

diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
new file mode 100644
index 0000000..a52135d
--- /dev/null
+++ b/test/CodeGen/X86/soft-fp.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86    -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
+
+	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @t1(i32 %a, ...) nounwind {
+entry:
+	%va = alloca [1 x %struct.__va_list_tag], align 8		; <[1 x %struct.__va_list_tag]*> [#uses=2]
+	%va12 = bitcast [1 x %struct.__va_list_tag]* %va to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %va12)
+	%va3 = getelementptr [1 x %struct.__va_list_tag]* %va, i64 0, i64 0		; <%struct.__va_list_tag*> [#uses=1]
+	call void @bar(%struct.__va_list_tag* %va3) nounwind
+	call void @llvm.va_end(i8* %va12)
+	ret i32 undef
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
+
+define float @t2(float %a, float %b) nounwind readnone {
+entry:
+	%0 = fadd float %a, %b		; <float> [#uses=1]
+	ret float %0
+}

diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
new file mode 100644
index 0000000..2b13029
--- /dev/null
+++ b/test/CodeGen/X86/splat-scalar-load.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd	$85, (%esp), %xmm0
+  %array = alloca [8 x float], align 4
+  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+  %tmp2 = load float* %arrayidx
+  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+  ret <2 x i64> %0
+}

diff --git a/test/CodeGen/X86/split-eh-lpad-edges.ll b/test/CodeGen/X86/split-eh-lpad-edges.ll
new file mode 100644
index 0000000..fd40a7f
--- /dev/null
+++ b/test/CodeGen/X86/split-eh-lpad-edges.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep jmp
+; rdar://6647639
+
+	%struct.FetchPlanHeader = type { i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)*, %struct.__attributeDescriptionFlags }
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSAutoreleasePool = type { %struct.NSObject, i8*, i8*, i8*, i8* }
+	%struct.NSObject = type { %struct.NSObject* }
+	%struct.__attributeDescriptionFlags = type <{ i32 }>
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_selector = type opaque
+@"\01l_objc_msgSend_fixup_alloc" = external global %struct._message_ref_t, align 16		; <%struct._message_ref_t*> [#uses=2]
+
+define %struct.NSArray* @newFetchedRowsForFetchPlan_MT(%struct.FetchPlanHeader* %fetchPlan, %struct.objc_selector* %selectionMethod, %struct.NSObject* %selectionParameter) ssp {
+entry:
+	%0 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc")
+			to label %invcont unwind label %lpad		; <%struct.NSObject*> [#uses=1]
+
+invcont:		; preds = %entry
+	%1 = invoke %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend(%struct.NSObject* %0, %struct.objc_selector* null)
+			to label %invcont26 unwind label %lpad		; <%struct.NSObject*> [#uses=0]
+
+invcont26:		; preds = %invcont
+	%2 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc")
+			to label %invcont27 unwind label %lpad		; <%struct.NSObject*> [#uses=0]
+
+invcont27:		; preds = %invcont26
+	unreachable
+
+lpad:		; preds = %invcont26, %invcont, %entry
+	%pool.1 = phi %struct.NSAutoreleasePool* [ null, %entry ], [ null, %invcont ], [ null, %invcont26 ]		; <%struct.NSAutoreleasePool*> [#uses=0]
+	unreachable
+}
+
+declare %struct.NSObject* @objc_msgSend(%struct.NSObject*, %struct.objc_selector*, ...)

diff --git a/test/CodeGen/X86/split-select.ll b/test/CodeGen/X86/split-select.ll
new file mode 100644
index 0000000..07d4d52
--- /dev/null
+++ b/test/CodeGen/X86/split-select.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 | grep test | count 1
+
+define void @foo(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) {
+  %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
+  store <2 x i16> %x, <2 x i16>* %p
+  ret void
+}

diff --git a/test/CodeGen/X86/split-vector-rem.ll b/test/CodeGen/X86/split-vector-rem.ll
new file mode 100644
index 0000000..681c6b0
--- /dev/null
+++ b/test/CodeGen/X86/split-vector-rem.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 16
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 8
+
+define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
+	%m = srem <8 x i32> %t, %u
+	ret <8 x i32> %m
+}
+define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
+	%m = urem <8 x i32> %t, %u
+	ret <8 x i32> %m
+}
+define <8 x float> @qux(<8 x float> %t, <8 x float> %u) {
+	%m = frem <8 x float> %t, %u
+	ret <8 x float> %m
+}

diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
new file mode 100644
index 0000000..b945530
--- /dev/null
+++ b/test/CodeGen/X86/sret.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep ret | grep 4
+
+	%struct.foo = type { [4 x i32] }
+
+define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
+entry:
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
+	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
+	store i32 1, i32* %tmp3, align 8
+        ret void
+}
+
+@dst = external global i32
+
+define void @foo() nounwind {
+	%memtmp = alloca %struct.foo, align 4
+        call void @bar( %struct.foo* sret %memtmp ) nounwind
+        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
+	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
+        %tmp6 = load i32* %tmp5
+        store i32 %tmp6, i32* @dst
+        ret void
+}

diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
new file mode 100644
index 0000000..b12a87d
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-0.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | not grep mov
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  %t = load <4 x float>* %p
+  %z = fmul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  %t = load <2 x double>* %p
+  %z = fmul <2 x double> %t, %x
+  ret <2 x double> %z
+}

diff --git a/test/CodeGen/X86/sse-align-1.ll b/test/CodeGen/X86/sse-align-1.ll
new file mode 100644
index 0000000..c7a5cd5
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-1.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
+
+define <4 x float> @foo(<4 x float>* %p) nounwind {
+  %t = load <4 x float>* %p
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p) nounwind {
+  %t = load <2 x double>* %p
+  ret <2 x double> %t
+}

diff --git a/test/CodeGen/X86/sse-align-10.ll b/test/CodeGen/X86/sse-align-10.ll
new file mode 100644
index 0000000..0f91697
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-10.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p) nounwind {
+  %t = load <2 x i64>* %p, align 8
+  ret <2 x i64> %t
+}

diff --git a/test/CodeGen/X86/sse-align-11.ll b/test/CodeGen/X86/sse-align-11.ll
new file mode 100644
index 0000000..aa1b437
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-11.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=linux | grep movups
+
+define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
+entry:
+        %tmp6 = insertelement <4 x float> undef, float %a, i32 0               
+        %tmp7 = insertelement <4 x float> %tmp6, float %b, i32 1               
+        %tmp8 = insertelement <4 x float> %tmp7, float %c, i32 2               
+        %tmp9 = insertelement <4 x float> %tmp8, float %d, i32 3               
+        ret <4 x float> %tmp9
+}
+

diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
new file mode 100644
index 0000000..4f025b9
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-12.ll

@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep unpck %t | count 2
+; RUN: grep shuf %t | count 2
+; RUN: grep ps %t | count 4
+; RUN: grep pd %t | count 4
+; RUN: grep movup %t | count 4
+
+define <4 x float> @a(<4 x float>* %y) nounwind {
+  %x = load <4 x float>* %y, align 4
+  %a = extractelement <4 x float> %x, i32 0
+  %b = extractelement <4 x float> %x, i32 1
+  %c = extractelement <4 x float> %x, i32 2
+  %d = extractelement <4 x float> %x, i32 3
+  %p = insertelement <4 x float> undef, float %d, i32 0
+  %q = insertelement <4 x float> %p, float %c, i32 1
+  %r = insertelement <4 x float> %q, float %b, i32 2
+  %s = insertelement <4 x float> %r, float %a, i32 3
+  ret <4 x float> %s
+}
+define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
+  %x = load <4 x float>* %y, align 4
+  %a = extractelement <4 x float> %x, i32 2
+  %b = extractelement <4 x float> %x, i32 3
+  %c = extractelement <4 x float> %z, i32 2
+  %d = extractelement <4 x float> %z, i32 3
+  %p = insertelement <4 x float> undef, float %c, i32 0
+  %q = insertelement <4 x float> %p, float %a, i32 1
+  %r = insertelement <4 x float> %q, float %d, i32 2
+  %s = insertelement <4 x float> %r, float %b, i32 3
+  ret <4 x float> %s
+}
+define <2 x double> @c(<2 x double>* %y) nounwind {
+  %x = load <2 x double>* %y, align 8
+  %a = extractelement <2 x double> %x, i32 0
+  %c = extractelement <2 x double> %x, i32 1
+  %p = insertelement <2 x double> undef, double %c, i32 0
+  %r = insertelement <2 x double> %p, double %a, i32 1
+  ret <2 x double> %r
+}
+define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
+  %x = load <2 x double>* %y, align 8
+  %a = extractelement <2 x double> %x, i32 1
+  %c = extractelement <2 x double> %z, i32 1
+  %p = insertelement <2 x double> undef, double %c, i32 0
+  %r = insertelement <2 x double> %p, double %a, i32 1
+  ret <2 x double> %r
+}

diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
new file mode 100644
index 0000000..102c3fb
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  %t = load <4 x float>* %p, align 4
+  %z = fmul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  %t = load <2 x double>* %p, align 8
+  %z = fmul <2 x double> %t, %x
+  ret <2 x double> %z
+}

diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
new file mode 100644
index 0000000..c42f7f0
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-3.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
+
+define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  store <4 x float> %x, <4 x float>* %p
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  store <2 x double> %x, <2 x double>* %p
+  ret void
+}

diff --git a/test/CodeGen/X86/sse-align-4.ll b/test/CodeGen/X86/sse-align-4.ll
new file mode 100644
index 0000000..4c59934
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-4.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  store <4 x float> %x, <4 x float>* %p, align 4
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  store <2 x double> %x, <2 x double>* %p, align 8
+  ret void
+}

diff --git a/test/CodeGen/X86/sse-align-5.ll b/test/CodeGen/X86/sse-align-5.ll
new file mode 100644
index 0000000..21cd231
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-5.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p) nounwind {
+  %t = load <2 x i64>* %p
+  ret <2 x i64> %t
+}

diff --git a/test/CodeGen/X86/sse-align-6.ll b/test/CodeGen/X86/sse-align-6.ll
new file mode 100644
index 0000000..0bbf422
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-6.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  %t = load <2 x i64>* %p, align 8
+  %z = mul <2 x i64> %t, %x
+  ret <2 x i64> %z
+}

diff --git a/test/CodeGen/X86/sse-align-7.ll b/test/CodeGen/X86/sse-align-7.ll
new file mode 100644
index 0000000..5784481
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-7.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  store <2 x i64> %x, <2 x i64>* %p
+  ret void
+}

diff --git a/test/CodeGen/X86/sse-align-8.ll b/test/CodeGen/X86/sse-align-8.ll
new file mode 100644
index 0000000..cfeff81
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-8.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  store <2 x i64> %x, <2 x i64>* %p, align 8
+  ret void
+}

diff --git a/test/CodeGen/X86/sse-align-9.ll b/test/CodeGen/X86/sse-align-9.ll
new file mode 100644
index 0000000..cb26b95
--- /dev/null
+++ b/test/CodeGen/X86/sse-align-9.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define <4 x float> @foo(<4 x float>* %p) nounwind {
+  %t = load <4 x float>* %p, align 4
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p) nounwind {
+  %t = load <2 x double>* %p, align 8
+  ret <2 x double> %t
+}

diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
new file mode 100644
index 0000000..0e0e4a9
--- /dev/null
+++ b/test/CodeGen/X86/sse-fcopysign.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep test
+
+define float @tst1(float %a, float %b) {
+	%tmp = tail call float @copysignf( float %b, float %a )
+	ret float %tmp
+}
+
+define double @tst2(double %a, float %b, float %c) {
+	%tmp1 = fadd float %b, %c
+	%tmp2 = fpext float %tmp1 to double
+	%tmp = tail call double @copysign( double %a, double %tmp2 )
+	ret double %tmp
+}
+
+declare float @copysignf(float, float)
+declare double @copysign(double, double)

diff --git a/test/CodeGen/X86/sse-load-ret.ll b/test/CodeGen/X86/sse-load-ret.ll
new file mode 100644
index 0000000..1ebcb1a
--- /dev/null
+++ b/test/CodeGen/X86/sse-load-ret.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
+
+define double @test1(double* %P) {
+        %X = load double* %P            ; <double> [#uses=1]
+        ret double %X
+}
+
+define double @test2() {
+        ret double 1.234560e+03
+}
+
+
+; FIXME: Todo
+;double %test3(bool %B) {
+;	%C = select bool %B, double 123.412, double 523.01123123
+;	ret double %C
+;}
+

diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
new file mode 100644
index 0000000..17ffb5e
--- /dev/null
+++ b/test/CodeGen/X86/sse-minmax.ll

@@ -0,0 +1,392 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; CHECK:      ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ogt_inverse:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      olt_inverse:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @oge_inverse(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ole_inverse(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ogt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_olt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ogt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_olt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_oge:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ole:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_oge_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ole_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      ugt:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ult:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ugt_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+define double @ugt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ult_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+define double @ult_inverse(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      uge:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ule:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ugt:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ult:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ugt_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ult_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_uge:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ule:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_uge_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ule_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; Test a few more misc. cases.
+
+; CHECK: clampTo3k_a:
+; CHECK: minsd
+define double @clampTo3k_a(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_b:
+; CHECK: minsd
+define double @clampTo3k_b(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_c:
+; CHECK: maxsd
+define double @clampTo3k_c(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_d:
+; CHECK: maxsd
+define double @clampTo3k_d(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_e:
+; CHECK: maxsd
+define double @clampTo3k_e(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_f:
+; CHECK: maxsd
+define double @clampTo3k_f(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_g:
+; CHECK: minsd
+define double @clampTo3k_g(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_h:
+; CHECK: minsd
+define double @clampTo3k_h(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}

diff --git a/test/CodeGen/X86/sse-varargs.ll b/test/CodeGen/X86/sse-varargs.ll
new file mode 100644
index 0000000..da38f0e
--- /dev/null
+++ b/test/CodeGen/X86/sse-varargs.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xmm | grep esp
+
+define i32 @t() nounwind  {
+entry:
+	tail call void (i32, ...)* @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind 
+	ret i32 0
+}
+
+declare void @foo(i32, ...)

diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
new file mode 100644
index 0000000..f2b8010
--- /dev/null
+++ b/test/CodeGen/X86/sse2.ll

@@ -0,0 +1,34 @@
+; Tests for SSE2 and below, without SSE3+.
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
+
+define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t1:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movl	4(%esp), %ecx
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movapd	%xmm0, (%ecx)
+; CHECK-NEXT: 	ret
+}
+
+define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: t2:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movl	4(%esp), %ecx
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movapd	%xmm0, (%ecx)
+; CHECK-NEXT: 	ret
+}

diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
new file mode 100644
index 0000000..b2af7c9
--- /dev/null
+++ b/test/CodeGen/X86/sse3.ll

@@ -0,0 +1,263 @@
+; These are tests for SSE3 codegen.  Yonah has SSE3 and earlier but not SSSE3+.
+
+; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9 -O3 \
+; RUN:              | FileCheck %s --check-prefix=X64
+
+; Test for v8xi16 lowering where we extract the first element of the vector and
+; placed it in the second element of the result.
+
+define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
+entry:
+	%tmp3 = load <8 x i16>* %old
+	%tmp6 = shufflevector <8 x i16> %tmp3,
+                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
+	store <8 x i16> %tmp6, <8 x i16>* %dest
+	ret void
+        
+; X64: t0:
+; X64: 	movddup	(%rsi), %xmm0
+; X64:	xorl	%eax, %eax
+; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movaps	%xmm0, (%rdi)
+; X64:	ret
+}
+
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp3
+        
+; X64: t1:
+; X64: 	movl	(%rsi), %eax
+; X64: 	movaps	(%rdi), %xmm0
+; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: t2:
+; X64:	pextrw	$1, %xmm1, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	pinsrw	$3, %eax, %xmm0
+; X64:	ret
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+	ret <8 x i16> %tmp
+; X64: t3:
+; X64: 	pextrw	$5, %xmm0, %eax
+; X64: 	pshuflw	$44, %xmm0, %xmm0
+; X64: 	pshufhw	$27, %xmm0, %xmm0
+; X64: 	pinsrw	$3, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+	ret <8 x i16> %tmp
+; X64: t4:
+; X64: 	pextrw	$7, %xmm0, %eax
+; X64: 	pshufhw	$100, %xmm0, %xmm2
+; X64: 	pinsrw	$1, %eax, %xmm2
+; X64: 	pextrw	$1, %xmm0, %eax
+; X64: 	movaps	%xmm2, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+	ret <8 x i16> %tmp
+; X64: 	t5:
+; X64: 		movlhps	%xmm1, %xmm0
+; X64: 		pshufd	$114, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t6:
+; X64: 		movss	%xmm1, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t7:
+; X64: 		pshuflw	$-80, %xmm0, %xmm0
+; X64: 		pshufhw	$-56, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
+	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
+	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
+	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
+	ret void
+; X64: 	t8:
+; X64: 		pshuflw	$-58, (%rsi), %xmm0
+; X64: 		pshufhw	$-58, %xmm0, %xmm0
+; X64: 		movaps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
+	%tmp = load <4 x float>* %r
+	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
+	%tmp.upgrd.4 = load double* %tmp.upgrd.3
+	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1	
+	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>	
+	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0	
+	%tmp7 = extractelement <4 x float> %tmp, i32 1		
+	%tmp8 = extractelement <4 x float> %tmp6, i32 0		
+	%tmp9 = extractelement <4 x float> %tmp6, i32 1		
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0	
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
+	store <4 x float> %tmp13, <4 x float>* %r
+	ret void
+; X64: 	t9:
+; X64: 		movsd	(%rsi), %xmm0
+; X64:	        movaps  (%rdi), %xmm1
+; X64:	        movlhps %xmm0, %xmm1
+; X64:	        movaps  %xmm1, (%rdi)
+; X64: 		ret
+}
+
+
+
+; FIXME: This testcase produces icky code. It can be made much better!
+; PR2585
+
+@g1 = external constant <4 x i32>
+@g2 = external constant <4 x i16>
+
+define internal void @t10() nounwind {
+        load <4 x i32>* @g1, align 16 
+        bitcast <4 x i32> %1 to <8 x i16>
+        shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
+        bitcast <8 x i16> %3 to <2 x i64>  
+        extractelement <2 x i64> %4, i32 0 
+        bitcast i64 %5 to <4 x i16>        
+        store <4 x i16> %6, <4 x i16>* @g2, align 8
+        ret void
+; X64: 	t10:
+; X64: 		pextrw	$4, %xmm0, %eax
+; X64: 		pextrw	$6, %xmm0, %edx
+; X64: 		movlhps	%xmm1, %xmm1
+; X64: 		pshuflw	$8, %xmm1, %xmm1
+; X64: 		pinsrw	$2, %eax, %xmm1
+; X64: 		pinsrw	$3, %edx, %xmm1
+}
+
+
+; Pack various elements via shuffles.
+define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+
+; X64: t11:
+; X64:	movlhps	%xmm0, %xmm0
+; X64:	movd	%xmm1, %eax
+; X64:	pshuflw	$1, %xmm0, %xmm0
+; X64:	pinsrw	$1, %eax, %xmm0
+; X64:	ret
+}
+
+
+define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+
+; X64: t12:
+; X64: 	movlhps	%xmm0, %xmm0
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	pshufhw	$3, %xmm0, %xmm0
+; X64: 	pinsrw	$5, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t13:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	pshufd	$52, %xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t14:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pshufhw	$8, %xmm1, %xmm0
+; X64: 	ret
+}
+
+
+
+define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+        ret <8 x i16> %tmp8
+; X64: 	t15:
+; X64: 		pextrw	$7, %xmm0, %eax
+; X64: 		punpcklqdq	%xmm1, %xmm0
+; X64: 		pshuflw	$-128, %xmm0, %xmm0
+; X64: 		pinsrw	$2, %eax, %xmm0
+; X64: 		ret
+}
+
+
+; Test yonah where we convert a shuffle to pextrw and pinrsw
+define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        ret <16 x i8> %tmp9
+; X64: 	t16:
+; X64: 		pinsrw	$0, %eax, %xmm1
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pinsrw	$1, %eax, %xmm1
+; X64: 		pextrw	$1, %xmm1, %ecx
+; X64: 		movd	%xmm1, %edx
+; X64: 		pinsrw	$0, %edx, %xmm1
+; X64: 		pinsrw	$1, %eax, %xmm0
+; X64: 		ret
+}

diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
new file mode 100644
index 0000000..a734c05
--- /dev/null
+++ b/test/CodeGen/X86/sse41.ll

@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+
+@g16 = external global i16
+
+define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
+        ret <4 x i32> %tmp1
+; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; X64: pinsrd_1:
+; X64:    pinsrd $1, %edi, %xmm0
+}
+
+define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
+        %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
+        ret <16 x i8> %tmp1
+; X32: pinsrb_1:
+; X32:    pinsrb $1, 4(%esp), %xmm0
+
+; X64: pinsrb_1:
+; X64:    pinsrb $1, %edi, %xmm0
+}
+
+
+define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 4
+	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
+	%2 = insertelement <4 x i32> %1, i32 0, i32 1
+	%3 = insertelement <4 x i32> %2, i32 0, i32 2
+	%4 = insertelement <4 x i32> %3, i32 0, i32 3
+	%5 = bitcast <4 x i32> %4 to <16 x i8>
+	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
+	%7 = bitcast <4 x i32> %6 to <2 x i64>
+	ret <2 x i64> %7
+        
+; X32: _pmovsxbd_1:
+; X32:   movl      4(%esp), %eax
+; X32:   pmovsxbd   (%eax), %xmm0
+
+; X64: _pmovsxbd_1:
+; X64:   pmovsxbd   (%rdi), %xmm0
+}
+
+define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
+entry:
+	%0 = load i64* %p		; <i64> [#uses=1]
+	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
+	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
+	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+        
+; X32: _pmovsxwd_1:
+; X32:   movl 4(%esp), %eax
+; X32:   pmovsxwd (%eax), %xmm0
+
+; X64: _pmovsxwd_1:
+; X64:   pmovsxwd (%rdi), %xmm0
+}
+
+
+
+
+define <2 x i64> @pmovzxbq_1() nounwind {
+entry:
+	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
+	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
+	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+
+; X32: _pmovzxbq_1:
+; X32:   movl	L_g16$non_lazy_ptr, %eax
+; X32:   pmovzxbq	(%eax), %xmm0
+
+; X64: _pmovzxbq_1:
+; X64:   movq	_g16@GOTPCREL(%rip), %rax
+; X64:   pmovzxbq	(%rax), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+
+
+define i32 @extractps_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %i = bitcast float %s to i32
+  ret i32 %i
+
+; X32: _extractps_1:  
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_1:  
+; X64:	  extractps	$3, %xmm0, %eax
+}
+define i32 @extractps_2(<4 x float> %v) nounwind {
+  %t = bitcast <4 x float> %v to <4 x i32>
+  %s = extractelement <4 x i32> %t, i32 3
+  ret i32 %s
+
+; X32: _extractps_2:
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_2:
+; X64:	  extractps	$3, %xmm0, %eax
+}
+
+
+; The non-store form of extractps puts its result into a GPR.
+; This makes it suitable for an extract from a <4 x float> that
+; is bitcasted to i32, but unsuitable for much of anything else.
+
+define float @ext_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %t = fadd float %s, 1.0
+  ret float %t
+
+; X32: _ext_1:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+; X32:	  addss	LCPI8_0, %xmm0
+
+; X64: _ext_1:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+; X64:	  addss	LCPI8_0(%rip), %xmm0
+}
+define float @ext_2(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  ret float %s
+
+; X32: _ext_2:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+
+; X64: _ext_2:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+}
+define i32 @ext_3(<4 x i32> %v) nounwind {
+  %i = extractelement <4 x i32> %v, i32 3
+  ret i32 %i
+
+; X32: _ext_3:
+; X32:	  pextrd	$3, %xmm0, %eax
+
+; X64: _ext_3:
+; X64:	  pextrd	$3, %xmm0, %eax
+}
+
+define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+        ret <4 x float> %tmp1
+; X32: _insertps_1:
+; X32:    insertps  $1, %xmm1, %xmm0
+
+; X64: _insertps_1:
+; X64:    insertps  $1, %xmm1, %xmm0
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
+        %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_2:
+; X32:    insertps  $0, 4(%esp), %xmm0
+
+; X64: _insertps_2:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp2 = extractelement <4 x float> %t2, i32 0
+        %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_3:
+; X32:    insertps  $0, %xmm1, %xmm0        
+
+; X64: _insertps_3:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_1:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sete	%al
+
+; X64: _ptestz_1:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sete	%al
+}
+
+define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_2:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    setb	%al
+
+; X64: _ptestz_2:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    setb	%al
+}
+
+define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_3:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    seta	%al
+
+; X64: _ptestz_3:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    seta	%al
+}
+
+
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+

diff --git a/test/CodeGen/X86/sse42.ll b/test/CodeGen/X86/sse42.ll
new file mode 100644
index 0000000..c9c4d01
--- /dev/null
+++ b/test/CodeGen/X86/sse42.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind
+
+define i32 @crc32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+; X32: _crc32_8:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_8:
+; X64:     crc32   %sil, %eax
+}
+
+
+define i32 @crc32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+; X32: _crc32_16:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_16:
+; X64:     crc32   %si, %eax
+}
+
+
+define i32 @crc32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+; X32: _crc32_32:
+; X32:     crc32   8(%esp), %eax
+
+; X64: _crc32_32:
+; X64:     crc32   %esi, %eax
+}

diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll
new file mode 100644
index 0000000..dc3d6fe
--- /dev/null
+++ b/test/CodeGen/X86/sse_reload_fold.ll

@@ -0,0 +1,124 @@
+; RUN: llc < %s -march=x86-64 -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& \
+; RUN:   grep fail | count 1
+
+declare float @test_f(float %f)
+declare double @test_d(double %f)
+declare <4 x float> @test_vf(<4 x float> %f)
+declare <2 x double> @test_vd(<2 x double> %f)
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>)
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
+
+define float @foo(float %f) {
+  %a = call float @test_f(float %f)
+  %t = call float @llvm.sqrt.f32(float %f)
+  ret float %t
+}
+define double @doo(double %f) {
+  %a = call double @test_d(double %f)
+  %t = call double @llvm.sqrt.f64(double %f)
+  ret double %t
+}
+define <4 x float> @a0(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @a1(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @a2(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b3(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b4(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b5(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
+  ret <4 x float> %t
+}
+define <4 x float> @b6(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b7(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b8(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <2 x double> @c1(<2 x double> %f) {
+  %a = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d3(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d4(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d5(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
+  ret <2 x double> %t
+}
+define <2 x double> @d6(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d7(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d8(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+
+; This one should fail to fuse.
+define <2 x double> @z0(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
+  ret <2 x double> %t
+}

diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
new file mode 100644
index 0000000..cb65e9b
--- /dev/null
+++ b/test/CodeGen/X86/stack-align.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm}
+
+; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
+; fold the load into the andpd.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@G = external global double
+
+define void @test({ double, double }* byval  %z, double* %P) {
+entry:
+	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp1 = load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
+	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
+	store double %tmp6, double* %P, align 8
+	ret void
+}
+
+declare double @fabs(double)

diff --git a/test/CodeGen/X86/stack-color-with-reg-2.ll b/test/CodeGen/X86/stack-color-with-reg-2.ll
new file mode 100644
index 0000000..c1f2672
--- /dev/null
+++ b/test/CodeGen/X86/stack-color-with-reg-2.ll

@@ -0,0 +1,230 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs | grep {movl\[\[:space:\]\]%eax, %ebx}
+
+	%"struct..0$_67" = type { i32, %"struct.llvm::MachineOperand"**, %"struct.llvm::MachineOperand"* }
+	%"struct..1$_69" = type { i32 }
+	%"struct.llvm::AbstractTypeUser" = type { i32 (...)** }
+	%"struct.llvm::AliasAnalysis" = type opaque
+	%"struct.llvm::AnalysisResolver" = type { %"struct.std::vector<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >", %"struct.llvm::PMDataManager"* }
+	%"struct.llvm::Annotable" = type { %"struct.llvm::Annotation"* }
+	%"struct.llvm::Annotation" = type { i32 (...)**, %"struct..1$_69", %"struct.llvm::Annotation"* }
+	%"struct.llvm::Argument" = type { %"struct.llvm::Value", %"struct.llvm::ilist_node<llvm::Argument>", %"struct.llvm::Function"* }
+	%"struct.llvm::AttrListPtr" = type { %"struct.llvm::AttributeListImpl"* }
+	%"struct.llvm::AttributeListImpl" = type opaque
+	%"struct.llvm::BasicBlock" = type { %"struct.llvm::Value", %"struct.llvm::ilist_node<llvm::BasicBlock>", %"struct.llvm::iplist<llvm::Instruction,llvm::ilist_traits<llvm::Instruction> >", %"struct.llvm::Function"* }
+	%"struct.llvm::BitVector" = type { i32*, i32, i32 }
+	%"struct.llvm::BumpPtrAllocator" = type { i8* }
+	%"struct.llvm::CalleeSavedInfo" = type { i32, %"struct.llvm::TargetRegisterClass"*, i32 }
+	%"struct.llvm::Constant" = type { %"struct.llvm::User" }
+	%"struct.llvm::DebugLocTracker" = type { %"struct.std::vector<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >", %"struct.llvm::DenseMap<llvm::DebugLocTuple,unsigned int,llvm::DenseMapInfo<llvm::DebugLocTuple>,llvm::DenseMapInfo<unsigned int> >" }
+	%"struct.llvm::DebugLocTuple" = type { %"struct.llvm::GlobalVariable"*, i32, i32 }
+	%"struct.llvm::DenseMap<llvm::DebugLocTuple,unsigned int,llvm::DenseMapInfo<llvm::DebugLocTuple>,llvm::DenseMapInfo<unsigned int> >" = type { i32, %"struct.std::pair<llvm::DebugLocTuple,unsigned int>"*, i32, i32 }
+	%"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >" = type { i32, %"struct.std::pair<llvm::MachineInstr*,unsigned int>"*, i32, i32 }
+	%"struct.llvm::DenseMap<unsigned int,char,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<char> >" = type { i32, %"struct.std::pair<unsigned int,char>"*, i32, i32 }
+	%"struct.llvm::DenseMap<unsigned int,llvm::LiveInterval*,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<llvm::LiveInterval*> >" = type { i32, %"struct.std::pair<unsigned int,llvm::LiveInterval*>"*, i32, i32 }
+	%"struct.llvm::DenseSet<unsigned int,llvm::DenseMapInfo<unsigned int> >" = type { %"struct.llvm::DenseMap<unsigned int,char,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<char> >" }
+	%"struct.llvm::Function" = type { %"struct.llvm::GlobalValue", %"struct.llvm::Annotable", %"struct.llvm::ilist_node<llvm::Function>", %"struct.llvm::iplist<llvm::BasicBlock,llvm::ilist_traits<llvm::BasicBlock> >", %"struct.llvm::iplist<llvm::Argument,llvm::ilist_traits<llvm::Argument> >", %"struct.llvm::ValueSymbolTable"*, %"struct.llvm::AttrListPtr" }
+	%"struct.llvm::FunctionPass" = type { %"struct.llvm::Pass" }
+	%"struct.llvm::GlobalValue" = type { %"struct.llvm::Constant", %"struct.llvm::Module"*, i32, %"struct.std::string" }
+	%"struct.llvm::GlobalVariable" = type opaque
+	%"struct.llvm::Instruction" = type { %"struct.llvm::User", %"struct.llvm::ilist_node<llvm::Instruction>", %"struct.llvm::BasicBlock"* }
+	%"struct.llvm::LiveInterval" = type <{ i32, float, i16, [6 x i8], %"struct.llvm::SmallVector<llvm::LiveRange,4u>", %"struct.llvm::SmallVector<llvm::MachineBasicBlock*,4u>" }>
+	%"struct.llvm::LiveIntervals" = type { %"struct.llvm::MachineFunctionPass", %"struct.llvm::MachineFunction"*, %"struct.llvm::MachineRegisterInfo"*, %"struct.llvm::TargetMachine"*, %"struct.llvm::TargetRegisterInfo"*, %"struct.llvm::TargetInstrInfo"*, %"struct.llvm::AliasAnalysis"*, %"struct.llvm::LiveVariables"*, %"struct.llvm::BumpPtrAllocator", %"struct.std::vector<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >", %"struct.std::vector<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >", i64, %"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >", %"struct.std::vector<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >", %"struct.llvm::DenseMap<unsigned int,llvm::LiveInterval*,llvm::DenseMapInfo<unsigned int>,llvm::DenseMapInfo<llvm::LiveInterval*> >", %"struct.llvm::BitVector", %"struct.std::vector<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" }
+	%"struct.llvm::LiveVariables" = type opaque
+	%"struct.llvm::MVT" = type { %"struct..1$_69" }
+	%"struct.llvm::MachineBasicBlock" = type { %"struct.llvm::ilist_node<llvm::MachineBasicBlock>", %"struct.llvm::ilist<llvm::MachineInstr>", %"struct.llvm::BasicBlock"*, i32, %"struct.llvm::MachineFunction"*, %"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >", %"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >", %"struct.std::vector<int,std::allocator<int> >", i32, i8 }
+	%"struct.llvm::MachineConstantPool" = type opaque
+	%"struct.llvm::MachineFrameInfo" = type { %"struct.std::vector<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >", i32, i8, i8, i64, i32, i32, i8, i32, i32, %"struct.std::vector<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >", %"struct.llvm::MachineModuleInfo"*, %"struct.llvm::TargetFrameInfo"* }
+	%"struct.llvm::MachineFrameInfo::StackObject" = type { i64, i32, i8, i64 }
+	%"struct.llvm::MachineFunction" = type { %"struct.llvm::Annotation", %"struct.llvm::Function"*, %"struct.llvm::TargetMachine"*, %"struct.llvm::MachineRegisterInfo"*, %"struct.llvm::AbstractTypeUser"*, %"struct.llvm::MachineFrameInfo"*, %"struct.llvm::MachineConstantPool"*, %"struct.llvm::MachineJumpTableInfo"*, %"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >", %"struct.llvm::BumpPtrAllocator", %"struct.llvm::Recycler<llvm::MachineBasicBlock,80ul,4ul>", %"struct.llvm::Recycler<llvm::MachineBasicBlock,80ul,4ul>", %"struct.llvm::ilist<llvm::MachineBasicBlock>", %"struct..1$_69", %"struct.llvm::DebugLocTracker" }
+	%"struct.llvm::MachineFunctionPass" = type { %"struct.llvm::FunctionPass" }
+	%"struct.llvm::MachineInstr" = type { %"struct.llvm::ilist_node<llvm::MachineInstr>", %"struct.llvm::TargetInstrDesc"*, i16, %"struct.std::vector<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >", %"struct.std::list<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >", %"struct.llvm::MachineBasicBlock"*, %"struct..1$_69" }
+	%"struct.llvm::MachineJumpTableInfo" = type opaque
+	%"struct.llvm::MachineModuleInfo" = type opaque
+	%"struct.llvm::MachineOperand" = type { i8, i8, i8, %"struct.llvm::MachineInstr"*, %"struct.llvm::MachineOperand::$_66" }
+	%"struct.llvm::MachineOperand::$_66" = type { %"struct..0$_67" }
+	%"struct.llvm::MachineRegisterInfo" = type { %"struct.std::vector<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >", %"struct.std::vector<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >", %"struct.llvm::MachineOperand"**, %"struct.llvm::BitVector", %"struct.std::vector<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >", %"struct.std::vector<int,std::allocator<int> >" }
+	%"struct.llvm::Module" = type opaque
+	%"struct.llvm::PATypeHandle" = type { %"struct.llvm::Type"*, %"struct.llvm::AbstractTypeUser"* }
+	%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* }
+	%"struct.llvm::PMDataManager" = type opaque
+	%"struct.llvm::Pass" = type { i32 (...)**, %"struct.llvm::AnalysisResolver"*, i32 }
+	%"struct.llvm::PassInfo" = type { i8*, i8*, i32, i8, i8, i8, %"struct.std::vector<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >", %"struct.llvm::Pass"* ()* }
+	%"struct.llvm::Recycler<llvm::MachineBasicBlock,80ul,4ul>" = type { %"struct.llvm::iplist<llvm::RecyclerStruct,llvm::ilist_traits<llvm::RecyclerStruct> >" }
+	%"struct.llvm::RecyclerStruct" = type { %"struct.llvm::RecyclerStruct"*, %"struct.llvm::RecyclerStruct"* }
+	%"struct.llvm::SmallVector<llvm::LiveRange,4u>" = type <{ [17 x i8], [47 x i8] }>
+	%"struct.llvm::SmallVector<llvm::MachineBasicBlock*,4u>" = type <{ [17 x i8], [15 x i8] }>
+	%"struct.llvm::TargetAsmInfo" = type opaque
+	%"struct.llvm::TargetFrameInfo" = type opaque
+	%"struct.llvm::TargetInstrDesc" = type { i16, i16, i16, i16, i8*, i32, i32, i32*, i32*, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetOperandInfo"* }
+	%"struct.llvm::TargetInstrInfo" = type { i32 (...)**, %"struct.llvm::TargetInstrDesc"*, i32 }
+	%"struct.llvm::TargetMachine" = type { i32 (...)**, %"struct.llvm::TargetAsmInfo"* }
+	%"struct.llvm::TargetOperandInfo" = type { i16, i16, i32 }
+	%"struct.llvm::TargetRegisterClass" = type { i32 (...)**, i32, i8*, %"struct.llvm::MVT"*, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, i32, i32, i32, i32*, i32*, %"struct.llvm::DenseSet<unsigned int,llvm::DenseMapInfo<unsigned int> >" }
+	%"struct.llvm::TargetRegisterDesc" = type { i8*, i8*, i32*, i32*, i32* }
+	%"struct.llvm::TargetRegisterInfo" = type { i32 (...)**, i32*, i32, i32*, i32, i32*, i32, %"struct.llvm::TargetRegisterDesc"*, i32, %"struct.llvm::TargetRegisterClass"**, %"struct.llvm::TargetRegisterClass"**, i32, i32 }
+	%"struct.llvm::Type" = type { %"struct.llvm::AbstractTypeUser", i8, [3 x i8], i32, %"struct.llvm::Type"*, %"struct.std::vector<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >", i32, %"struct.llvm::PATypeHandle"* }
+	%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::Use"*, %"struct..1$_69" }
+	%"struct.llvm::User" = type { %"struct.llvm::Value", %"struct.llvm::Use"*, i32 }
+	%"struct.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"struct.llvm::PATypeHolder", %"struct.llvm::Use"*, %"struct.llvm::ValueName"* }
+	%"struct.llvm::ValueName" = type opaque
+	%"struct.llvm::ValueSymbolTable" = type opaque
+	%"struct.llvm::ilist<llvm::MachineBasicBlock>" = type { %"struct.llvm::iplist<llvm::MachineBasicBlock,llvm::ilist_traits<llvm::MachineBasicBlock> >" }
+	%"struct.llvm::ilist<llvm::MachineInstr>" = type { %"struct.llvm::iplist<llvm::MachineInstr,llvm::ilist_traits<llvm::MachineInstr> >" }
+	%"struct.llvm::ilist_node<llvm::Argument>" = type { %"struct.llvm::Argument"*, %"struct.llvm::Argument"* }
+	%"struct.llvm::ilist_node<llvm::BasicBlock>" = type { %"struct.llvm::BasicBlock"*, %"struct.llvm::BasicBlock"* }
+	%"struct.llvm::ilist_node<llvm::Function>" = type { %"struct.llvm::Function"*, %"struct.llvm::Function"* }
+	%"struct.llvm::ilist_node<llvm::Instruction>" = type { %"struct.llvm::Instruction"*, %"struct.llvm::Instruction"* }
+	%"struct.llvm::ilist_node<llvm::MachineBasicBlock>" = type { %"struct.llvm::MachineBasicBlock"*, %"struct.llvm::MachineBasicBlock"* }
+	%"struct.llvm::ilist_node<llvm::MachineInstr>" = type { %"struct.llvm::MachineInstr"*, %"struct.llvm::MachineInstr"* }
+	%"struct.llvm::ilist_traits<llvm::Argument>" = type { %"struct.llvm::ilist_node<llvm::Argument>" }
+	%"struct.llvm::ilist_traits<llvm::BasicBlock>" = type { %"struct.llvm::ilist_node<llvm::BasicBlock>" }
+	%"struct.llvm::ilist_traits<llvm::Instruction>" = type { %"struct.llvm::ilist_node<llvm::Instruction>" }
+	%"struct.llvm::ilist_traits<llvm::MachineBasicBlock>" = type { %"struct.llvm::ilist_node<llvm::MachineBasicBlock>" }
+	%"struct.llvm::ilist_traits<llvm::MachineInstr>" = type { %"struct.llvm::ilist_node<llvm::MachineInstr>", %"struct.llvm::MachineBasicBlock"* }
+	%"struct.llvm::ilist_traits<llvm::RecyclerStruct>" = type { %"struct.llvm::RecyclerStruct" }
+	%"struct.llvm::iplist<llvm::Argument,llvm::ilist_traits<llvm::Argument> >" = type { %"struct.llvm::ilist_traits<llvm::Argument>", %"struct.llvm::Argument"* }
+	%"struct.llvm::iplist<llvm::BasicBlock,llvm::ilist_traits<llvm::BasicBlock> >" = type { %"struct.llvm::ilist_traits<llvm::BasicBlock>", %"struct.llvm::BasicBlock"* }
+	%"struct.llvm::iplist<llvm::Instruction,llvm::ilist_traits<llvm::Instruction> >" = type { %"struct.llvm::ilist_traits<llvm::Instruction>", %"struct.llvm::Instruction"* }
+	%"struct.llvm::iplist<llvm::MachineBasicBlock,llvm::ilist_traits<llvm::MachineBasicBlock> >" = type { %"struct.llvm::ilist_traits<llvm::MachineBasicBlock>", %"struct.llvm::MachineBasicBlock"* }
+	%"struct.llvm::iplist<llvm::MachineInstr,llvm::ilist_traits<llvm::MachineInstr> >" = type { %"struct.llvm::ilist_traits<llvm::MachineInstr>", %"struct.llvm::MachineInstr"* }
+	%"struct.llvm::iplist<llvm::RecyclerStruct,llvm::ilist_traits<llvm::RecyclerStruct> >" = type { %"struct.llvm::ilist_traits<llvm::RecyclerStruct>", %"struct.llvm::RecyclerStruct"* }
+	%"struct.std::IdxMBBPair" = type { i32, %"struct.llvm::MachineBasicBlock"* }
+	%"struct.std::_List_base<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >" = type { %"struct.llvm::ilist_traits<llvm::RecyclerStruct>" }
+	%"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" = type { %"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >::_Vector_impl" }
+	%"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >::_Vector_impl" = type { %"struct.llvm::PassInfo"**, %"struct.llvm::PassInfo"**, %"struct.llvm::PassInfo"** }
+	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" = type { %"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >::_Vector_impl" }
+	%"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >::_Vector_impl" = type { %"struct.llvm::AbstractTypeUser"**, %"struct.llvm::AbstractTypeUser"**, %"struct.llvm::AbstractTypeUser"** }
+	%"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >" = type { %"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >::_Vector_impl" }
+	%"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >::_Vector_impl" = type { %"struct.llvm::CalleeSavedInfo"*, %"struct.llvm::CalleeSavedInfo"*, %"struct.llvm::CalleeSavedInfo"* }
+	%"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >" = type { %"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >::_Vector_impl" }
+	%"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >::_Vector_impl" = type { %"struct.llvm::DebugLocTuple"*, %"struct.llvm::DebugLocTuple"*, %"struct.llvm::DebugLocTuple"* }
+	%"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >::_Vector_impl" }
+	%"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >::_Vector_impl" = type { %"struct.llvm::MachineBasicBlock"**, %"struct.llvm::MachineBasicBlock"**, %"struct.llvm::MachineBasicBlock"** }
+	%"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >" = type { %"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >::_Vector_impl" }
+	%"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >::_Vector_impl" = type { %"struct.llvm::MachineFrameInfo::StackObject"*, %"struct.llvm::MachineFrameInfo::StackObject"*, %"struct.llvm::MachineFrameInfo::StackObject"* }
+	%"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" = type { %"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >::_Vector_impl" }
+	%"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >::_Vector_impl" = type { %"struct.llvm::MachineInstr"**, %"struct.llvm::MachineInstr"**, %"struct.llvm::MachineInstr"** }
+	%"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" = type { %"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >::_Vector_impl" }
+	%"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >::_Vector_impl" = type { %"struct.llvm::MachineOperand"*, %"struct.llvm::MachineOperand"*, %"struct.llvm::MachineOperand"* }
+	%"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >::_Vector_impl" = type { %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"*, %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"*, %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"* }
+	%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >::_Vector_impl" = type { %"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>"*, %"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>"*, %"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>"* }
+	%"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >::_Vector_impl" = type { %"struct.std::IdxMBBPair"*, %"struct.std::IdxMBBPair"*, %"struct.std::IdxMBBPair"* }
+	%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" = type { %"struct.std::pair<unsigned int,int>"*, %"struct.std::pair<unsigned int,int>"*, %"struct.std::pair<unsigned int,int>"* }
+	%"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >" = type { %"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >::_Vector_impl" = type { %"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"* }
+	%"struct.std::list<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >" = type { %"struct.std::_List_base<llvm::MachineMemOperand,std::allocator<llvm::MachineMemOperand> >" }
+	%"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>" = type { %"struct.llvm::PassInfo"*, %"struct.llvm::Pass"* }
+	%"struct.std::pair<const llvm::TargetRegisterClass*,llvm::MachineOperand*>" = type { %"struct.llvm::TargetRegisterClass"*, %"struct.llvm::MachineOperand"* }
+	%"struct.std::pair<llvm::DebugLocTuple,unsigned int>" = type { %"struct.llvm::DebugLocTuple", i32 }
+	%"struct.std::pair<llvm::MachineInstr*,unsigned int>" = type { %"struct.llvm::MachineInstr"*, i32 }
+	%"struct.std::pair<unsigned int,char>" = type { i32, i8 }
+	%"struct.std::pair<unsigned int,int>" = type { i32, i32 }
+	%"struct.std::pair<unsigned int,llvm::LiveInterval*>" = type { i32, %"struct.llvm::LiveInterval"* }
+	%"struct.std::string" = type { %"struct.llvm::BumpPtrAllocator" }
+	%"struct.std::vector<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" = type { %"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" }
+	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+	%"struct.std::vector<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" = type { %"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" }
+	%"struct.std::vector<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >" = type { %"struct.std::_Vector_base<llvm::CalleeSavedInfo,std::allocator<llvm::CalleeSavedInfo> >" }
+	%"struct.std::vector<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >" = type { %"struct.std::_Vector_base<llvm::DebugLocTuple,std::allocator<llvm::DebugLocTuple> >" }
+	%"struct.std::vector<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock*,std::allocator<llvm::MachineBasicBlock*> >" }
+	%"struct.std::vector<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >" = type { %"struct.std::_Vector_base<llvm::MachineFrameInfo::StackObject,std::allocator<llvm::MachineFrameInfo::StackObject> >" }
+	%"struct.std::vector<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" = type { %"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" }
+	%"struct.std::vector<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" = type { %"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" }
+	%"struct.std::vector<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" }
+	%"struct.std::vector<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*>,std::allocator<std::pair<const llvm::TargetRegisterClass*, llvm::MachineOperand*> > >" }
+	%"struct.std::vector<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, llvm::MachineBasicBlock*>,std::allocator<std::pair<unsigned int, llvm::MachineBasicBlock*> > >" }
+	%"struct.std::vector<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>,std::allocator<std::pair<unsigned int, unsigned int> > >" }
+	%"struct.std::vector<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >" = type { %"struct.std::_Vector_base<std::vector<unsigned int, std::allocator<unsigned int> >,std::allocator<std::vector<unsigned int, std::allocator<unsigned int> > > >" }
+@_ZZNK4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE15LookupBucketForERKS2_RPSt4pairIS2_jEE8__func__ = external constant [16 x i8]		; <[16 x i8]*> [#uses=1]
+@"\01LC6" = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
+@"\01LC7" = external constant [134 x i8]		; <[134 x i8]*> [#uses=1]
+@"\01LC8" = external constant [72 x i8]		; <[72 x i8]*> [#uses=1]
+@_ZZN4llvm13LiveIntervals24InsertMachineInstrInMapsEPNS_12MachineInstrEjE8__func__ = external constant [25 x i8]		; <[25 x i8]*> [#uses=1]
+@"\01LC51" = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
+
+define void @_ZN4llvm13LiveIntervals24InsertMachineInstrInMapsEPNS_12MachineInstrEj(%"struct.llvm::LiveIntervals"* nocapture %this, %"struct.llvm::MachineInstr"* %MI, i32 %Index) nounwind ssp {
+entry:
+	%0 = call i64 @_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE4findERKS2_(%"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >"* null, %"struct.llvm::MachineInstr"** null) nounwind ssp		; <i64> [#uses=1]
+	%1 = trunc i64 %0 to i32		; <i32> [#uses=1]
+	%tmp11 = inttoptr i32 %1 to %"struct.std::pair<llvm::MachineInstr*,unsigned int>"*		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=1]
+	%2 = load %"struct.std::pair<llvm::MachineInstr*,unsigned int>"** null, align 4		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=3]
+	%3 = getelementptr %"struct.llvm::LiveIntervals"* %this, i32 0, i32 12, i32 0		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=2]
+	%5 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %2, i32 %4		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=1]
+	br label %bb1.i.i.i
+
+bb.i.i.i:		; preds = %bb2.i.i.i
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb1.i.i.i
+
+bb1.i.i.i:		; preds = %bb.i.i.i, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i.i.i ]		; <i32> [#uses=2]
+	%tmp32 = shl i32 %indvar, 3		; <i32> [#uses=1]
+	%ctg2.sum = add i32 0, %tmp32		; <i32> [#uses=1]
+	%ctg237 = getelementptr i8* null, i32 %ctg2.sum		; <i8*> [#uses=1]
+	%.0.0.i = bitcast i8* %ctg237 to %"struct.std::pair<llvm::MachineInstr*,unsigned int>"*		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=2]
+	%6 = icmp eq %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %.0.0.i, %5		; <i1> [#uses=1]
+	br i1 %6, label %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit, label %bb2.i.i.i
+
+bb2.i.i.i:		; preds = %bb1.i.i.i
+	%7 = load %"struct.llvm::MachineInstr"** null, align 4		; <%"struct.llvm::MachineInstr"*> [#uses=1]
+	%8 = icmp eq %"struct.llvm::MachineInstr"* %7, inttoptr (i32 -8 to %"struct.llvm::MachineInstr"*)		; <i1> [#uses=1]
+	%or.cond.i.i.i21 = or i1 false, %8		; <i1> [#uses=1]
+	br i1 %or.cond.i.i.i21, label %bb.i.i.i, label %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit
+
+_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit:		; preds = %bb2.i.i.i, %bb1.i.i.i
+	%9 = icmp eq %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %tmp11, %.0.0.i		; <i1> [#uses=1]
+	br i1 %9, label %bb7, label %bb6
+
+bb6:		; preds = %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit
+	call void @__assert_rtn(i8* getelementptr ([25 x i8]* @_ZZN4llvm13LiveIntervals24InsertMachineInstrInMapsEPNS_12MachineInstrEjE8__func__, i32 0, i32 0), i8* getelementptr ([72 x i8]* @"\01LC8", i32 0, i32 0), i32 251, i8* getelementptr ([42 x i8]* @"\01LC51", i32 0, i32 0)) noreturn nounwind
+	unreachable
+
+bb7:		; preds = %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE3endEv.exit
+	%10 = load %"struct.llvm::MachineInstr"** null, align 4		; <%"struct.llvm::MachineInstr"*> [#uses=2]
+	%11 = icmp eq %"struct.llvm::MachineInstr"* %10, inttoptr (i32 -8 to %"struct.llvm::MachineInstr"*)		; <i1> [#uses=1]
+	%or.cond40.i.i.i = or i1 false, %11		; <i1> [#uses=1]
+	br i1 %or.cond40.i.i.i, label %bb5.i.i.i, label %bb6.preheader.i.i.i
+
+bb6.preheader.i.i.i:		; preds = %bb7
+	%12 = add i32 %4, -1		; <i32> [#uses=1]
+	br label %bb6.i.i.i
+
+bb5.i.i.i:		; preds = %bb7
+	call void @__assert_rtn(i8* getelementptr ([16 x i8]* @_ZZNK4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE15LookupBucketForERKS2_RPSt4pairIS2_jEE8__func__, i32 0, i32 0), i8* getelementptr ([56 x i8]* @"\01LC6", i32 0, i32 0), i32 390, i8* getelementptr ([134 x i8]* @"\01LC7", i32 0, i32 0)) noreturn nounwind
+	unreachable
+
+bb6.i.i.i:		; preds = %bb17.i.i.i, %bb6.preheader.i.i.i
+	%FoundTombstone.1.i.i.i = phi %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* [ %FoundTombstone.0.i.i.i, %bb17.i.i.i ], [ null, %bb6.preheader.i.i.i ]		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=2]
+	%ProbeAmt.0.i.i.i = phi i32 [ 0, %bb17.i.i.i ], [ 1, %bb6.preheader.i.i.i ]		; <i32> [#uses=1]
+	%BucketNo.0.i.i.i = phi i32 [ %20, %bb17.i.i.i ], [ 0, %bb6.preheader.i.i.i ]		; <i32> [#uses=2]
+	%13 = and i32 %BucketNo.0.i.i.i, %12		; <i32> [#uses=2]
+	%14 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %2, i32 %13		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=2]
+	%15 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %2, i32 %13, i32 0		; <%"struct.llvm::MachineInstr"**> [#uses=1]
+	%16 = load %"struct.llvm::MachineInstr"** %15, align 4		; <%"struct.llvm::MachineInstr"*> [#uses=2]
+	%17 = icmp eq %"struct.llvm::MachineInstr"* %16, %10		; <i1> [#uses=1]
+	br i1 %17, label %_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEEixERKS2_.exit, label %bb17.i.i.i
+
+bb17.i.i.i:		; preds = %bb6.i.i.i
+	%18 = icmp eq %"struct.llvm::MachineInstr"* %16, inttoptr (i32 -8 to %"struct.llvm::MachineInstr"*)		; <i1> [#uses=1]
+	%19 = icmp eq %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %FoundTombstone.1.i.i.i, null		; <i1> [#uses=1]
+	%or.cond.i.i.i = and i1 %18, %19		; <i1> [#uses=1]
+	%FoundTombstone.0.i.i.i = select i1 %or.cond.i.i.i, %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %14, %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %FoundTombstone.1.i.i.i		; <%"struct.std::pair<llvm::MachineInstr*,unsigned int>"*> [#uses=1]
+	%20 = add i32 %BucketNo.0.i.i.i, %ProbeAmt.0.i.i.i		; <i32> [#uses=1]
+	br label %bb6.i.i.i
+
+_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEEixERKS2_.exit:		; preds = %bb6.i.i.i
+	%21 = getelementptr %"struct.std::pair<llvm::MachineInstr*,unsigned int>"* %14, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %Index, i32* %21, align 4
+	ret void
+}
+
+declare void @__assert_rtn(i8*, i8*, i32, i8*) noreturn
+
+declare i64 @_ZN4llvm8DenseMapIPNS_12MachineInstrEjNS_12DenseMapInfoIS2_EENS3_IjEEE4findERKS2_(%"struct.llvm::DenseMap<llvm::MachineInstr*,unsigned int,llvm::DenseMapInfo<llvm::MachineInstr*>,llvm::DenseMapInfo<unsigned int> >"* nocapture, %"struct.llvm::MachineInstr"** nocapture) nounwind ssp

diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
new file mode 100644
index 0000000..7d85818
--- /dev/null
+++ b/test/CodeGen/X86/stack-color-with-reg.ll

@@ -0,0 +1,360 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 14
+
+	type { [62 x %struct.Bitvec*] }		; type %0
+	type { i8* }		; type %1
+	type { double }		; type %2
+	%struct..5sPragmaType = type { i8*, i32 }
+	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+	%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+	%struct.AuxData = type { i8*, void (i8*)* }
+	%struct.Bitvec = type { i32, i32, i32, %0 }
+	%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+	%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+	%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+	%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+	%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+	%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+	%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+	%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+	%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+	%struct.Context = type { i64, i32, %struct.Fifo }
+	%struct.CountCtx = type { i64 }
+	%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+	%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+	%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+	%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+	%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+	%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+	%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+	%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+	%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+	%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+	%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+	%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+	%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+	%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+	%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+	%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+	%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+	%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+	%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+	%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+	%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+	%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+	%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+	%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+	%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+	%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+	%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+	%struct._OvflCell = type { i8*, i16 }
+	%struct._ht = type { i32, %struct.HashElem* }
+	%struct.sColMap = type { i32, i8* }
+	%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+	%struct.sqlite3InitInfo = type { i32, i32, i8 }
+	%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+	%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+	%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+	%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+	%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+	%struct.sqlite3_mutex = type opaque
+	%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+	%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+	%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.MemPage*, i32, i32)* @dropCell to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
+entry:
+	%0 = getelementptr %struct.MemPage* %pPage, i64 0, i32 18		; <i8**> [#uses=1]
+	%1 = load i8** %0, align 8		; <i8*> [#uses=34]
+	%2 = getelementptr %struct.MemPage* %pPage, i64 0, i32 12		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = zext i16 %3 to i32		; <i32> [#uses=2]
+	%5 = shl i32 %idx, 1		; <i32> [#uses=2]
+	%6 = add i32 %4, %5		; <i32> [#uses=1]
+	%7 = sext i32 %6 to i64		; <i64> [#uses=2]
+	%8 = getelementptr i8* %1, i64 %7		; <i8*> [#uses=1]
+	%9 = load i8* %8, align 1		; <i8> [#uses=2]
+	%10 = zext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = shl i32 %10, 8		; <i32> [#uses=1]
+	%.sum3 = add i64 %7, 1		; <i64> [#uses=1]
+	%12 = getelementptr i8* %1, i64 %.sum3		; <i8*> [#uses=1]
+	%13 = load i8* %12, align 1		; <i8> [#uses=2]
+	%14 = zext i8 %13 to i32		; <i32> [#uses=1]
+	%15 = or i32 %11, %14		; <i32> [#uses=3]
+	%16 = icmp slt i32 %sz, 4		; <i1> [#uses=1]
+	%size_addr.0.i = select i1 %16, i32 4, i32 %sz		; <i32> [#uses=3]
+	%17 = getelementptr %struct.MemPage* %pPage, i64 0, i32 8		; <i8*> [#uses=5]
+	%18 = load i8* %17, align 8		; <i8> [#uses=1]
+	%19 = zext i8 %18 to i32		; <i32> [#uses=4]
+	%20 = add i32 %19, 1		; <i32> [#uses=2]
+	br label %bb3.i
+
+bb3.i:		; preds = %bb3.i, %entry
+	%addr.0.i = phi i32 [ %20, %entry ], [ %29, %bb3.i ]		; <i32> [#uses=1]
+	%21 = sext i32 %addr.0.i to i64		; <i64> [#uses=2]
+	%22 = getelementptr i8* %1, i64 %21		; <i8*> [#uses=2]
+	%23 = load i8* %22, align 1		; <i8> [#uses=2]
+	%24 = zext i8 %23 to i32		; <i32> [#uses=1]
+	%25 = shl i32 %24, 8		; <i32> [#uses=1]
+	%.sum34.i = add i64 %21, 1		; <i64> [#uses=1]
+	%26 = getelementptr i8* %1, i64 %.sum34.i		; <i8*> [#uses=2]
+	%27 = load i8* %26, align 1		; <i8> [#uses=2]
+	%28 = zext i8 %27 to i32		; <i32> [#uses=1]
+	%29 = or i32 %25, %28		; <i32> [#uses=3]
+	%.not.i = icmp uge i32 %29, %15		; <i1> [#uses=1]
+	%30 = icmp eq i32 %29, 0		; <i1> [#uses=1]
+	%or.cond.i = or i1 %30, %.not.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %bb5.i, label %bb3.i
+
+bb5.i:		; preds = %bb3.i
+	store i8 %9, i8* %22, align 1
+	store i8 %13, i8* %26, align 1
+	%31 = zext i32 %15 to i64		; <i64> [#uses=2]
+	%32 = getelementptr i8* %1, i64 %31		; <i8*> [#uses=1]
+	store i8 %23, i8* %32, align 1
+	%.sum32.i = add i64 %31, 1		; <i64> [#uses=1]
+	%33 = getelementptr i8* %1, i64 %.sum32.i		; <i8*> [#uses=1]
+	store i8 %27, i8* %33, align 1
+	%34 = add i32 %15, 2		; <i32> [#uses=1]
+	%35 = zext i32 %34 to i64		; <i64> [#uses=2]
+	%36 = getelementptr i8* %1, i64 %35		; <i8*> [#uses=1]
+	%37 = lshr i32 %size_addr.0.i, 8		; <i32> [#uses=1]
+	%38 = trunc i32 %37 to i8		; <i8> [#uses=1]
+	store i8 %38, i8* %36, align 1
+	%39 = trunc i32 %size_addr.0.i to i8		; <i8> [#uses=1]
+	%.sum31.i = add i64 %35, 1		; <i64> [#uses=1]
+	%40 = getelementptr i8* %1, i64 %.sum31.i		; <i8*> [#uses=1]
+	store i8 %39, i8* %40, align 1
+	%41 = getelementptr %struct.MemPage* %pPage, i64 0, i32 14		; <i16*> [#uses=4]
+	%42 = load i16* %41, align 2		; <i16> [#uses=1]
+	%43 = trunc i32 %size_addr.0.i to i16		; <i16> [#uses=1]
+	%44 = add i16 %42, %43		; <i16> [#uses=1]
+	store i16 %44, i16* %41, align 2
+	%45 = load i8* %17, align 8		; <i8> [#uses=1]
+	%46 = zext i8 %45 to i32		; <i32> [#uses=1]
+	%47 = add i32 %46, 1		; <i32> [#uses=1]
+	br label %bb11.outer.i
+
+bb11.outer.i:		; preds = %bb6.i, %bb5.i
+	%addr.1.ph.i = phi i32 [ %47, %bb5.i ], [ %111, %bb6.i ]		; <i32> [#uses=1]
+	%48 = sext i32 %addr.1.ph.i to i64		; <i64> [#uses=2]
+	%49 = getelementptr i8* %1, i64 %48		; <i8*> [#uses=1]
+	%.sum30.i = add i64 %48, 1		; <i64> [#uses=1]
+	%50 = getelementptr i8* %1, i64 %.sum30.i		; <i8*> [#uses=1]
+	br label %bb11.i
+
+bb6.i:		; preds = %bb11.i
+	%51 = zext i32 %111 to i64		; <i64> [#uses=2]
+	%52 = getelementptr i8* %1, i64 %51		; <i8*> [#uses=2]
+	%53 = load i8* %52, align 1		; <i8> [#uses=1]
+	%54 = zext i8 %53 to i32		; <i32> [#uses=1]
+	%55 = shl i32 %54, 8		; <i32> [#uses=1]
+	%.sum24.i = add i64 %51, 1		; <i64> [#uses=1]
+	%56 = getelementptr i8* %1, i64 %.sum24.i		; <i8*> [#uses=2]
+	%57 = load i8* %56, align 1		; <i8> [#uses=3]
+	%58 = zext i8 %57 to i32		; <i32> [#uses=1]
+	%59 = or i32 %55, %58		; <i32> [#uses=5]
+	%60 = add i32 %111, 2		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=2]
+	%62 = getelementptr i8* %1, i64 %61		; <i8*> [#uses=2]
+	%63 = load i8* %62, align 1		; <i8> [#uses=1]
+	%64 = zext i8 %63 to i32		; <i32> [#uses=1]
+	%65 = shl i32 %64, 8		; <i32> [#uses=1]
+	%.sum23.i = add i64 %61, 1		; <i64> [#uses=1]
+	%66 = getelementptr i8* %1, i64 %.sum23.i		; <i8*> [#uses=2]
+	%67 = load i8* %66, align 1		; <i8> [#uses=2]
+	%68 = zext i8 %67 to i32		; <i32> [#uses=1]
+	%69 = or i32 %65, %68		; <i32> [#uses=1]
+	%70 = add i32 %111, 3		; <i32> [#uses=1]
+	%71 = add i32 %70, %69		; <i32> [#uses=1]
+	%72 = icmp sge i32 %71, %59		; <i1> [#uses=1]
+	%73 = icmp ne i32 %59, 0		; <i1> [#uses=1]
+	%74 = and i1 %72, %73		; <i1> [#uses=1]
+	br i1 %74, label %bb9.i, label %bb11.outer.i
+
+bb9.i:		; preds = %bb6.i
+	%75 = load i8* %17, align 8		; <i8> [#uses=1]
+	%76 = zext i8 %75 to i32		; <i32> [#uses=1]
+	%77 = add i32 %76, 7		; <i32> [#uses=1]
+	%78 = zext i32 %77 to i64		; <i64> [#uses=1]
+	%79 = getelementptr i8* %1, i64 %78		; <i8*> [#uses=2]
+	%80 = load i8* %79, align 1		; <i8> [#uses=1]
+	%81 = sub i8 %109, %57		; <i8> [#uses=1]
+	%82 = add i8 %81, %67		; <i8> [#uses=1]
+	%83 = add i8 %82, %80		; <i8> [#uses=1]
+	store i8 %83, i8* %79, align 1
+	%84 = zext i32 %59 to i64		; <i64> [#uses=2]
+	%85 = getelementptr i8* %1, i64 %84		; <i8*> [#uses=1]
+	%86 = load i8* %85, align 1		; <i8> [#uses=1]
+	store i8 %86, i8* %52, align 1
+	%.sum22.i = add i64 %84, 1		; <i64> [#uses=1]
+	%87 = getelementptr i8* %1, i64 %.sum22.i		; <i8*> [#uses=1]
+	%88 = load i8* %87, align 1		; <i8> [#uses=1]
+	store i8 %88, i8* %56, align 1
+	%89 = add i32 %59, 2		; <i32> [#uses=1]
+	%90 = zext i32 %89 to i64		; <i64> [#uses=2]
+	%91 = getelementptr i8* %1, i64 %90		; <i8*> [#uses=1]
+	%92 = load i8* %91, align 1		; <i8> [#uses=1]
+	%93 = zext i8 %92 to i32		; <i32> [#uses=1]
+	%94 = shl i32 %93, 8		; <i32> [#uses=1]
+	%.sum20.i = add i64 %90, 1		; <i64> [#uses=1]
+	%95 = getelementptr i8* %1, i64 %.sum20.i		; <i8*> [#uses=2]
+	%96 = load i8* %95, align 1		; <i8> [#uses=1]
+	%97 = zext i8 %96 to i32		; <i32> [#uses=1]
+	%98 = or i32 %94, %97		; <i32> [#uses=1]
+	%99 = sub i32 %59, %111		; <i32> [#uses=1]
+	%100 = add i32 %99, %98		; <i32> [#uses=1]
+	%101 = lshr i32 %100, 8		; <i32> [#uses=1]
+	%102 = trunc i32 %101 to i8		; <i8> [#uses=1]
+	store i8 %102, i8* %62, align 1
+	%103 = load i8* %95, align 1		; <i8> [#uses=1]
+	%104 = sub i8 %57, %109		; <i8> [#uses=1]
+	%105 = add i8 %104, %103		; <i8> [#uses=1]
+	store i8 %105, i8* %66, align 1
+	br label %bb11.i
+
+bb11.i:		; preds = %bb9.i, %bb11.outer.i
+	%106 = load i8* %49, align 1		; <i8> [#uses=1]
+	%107 = zext i8 %106 to i32		; <i32> [#uses=1]
+	%108 = shl i32 %107, 8		; <i32> [#uses=1]
+	%109 = load i8* %50, align 1		; <i8> [#uses=3]
+	%110 = zext i8 %109 to i32		; <i32> [#uses=1]
+	%111 = or i32 %108, %110		; <i32> [#uses=6]
+	%112 = icmp eq i32 %111, 0		; <i1> [#uses=1]
+	br i1 %112, label %bb12.i, label %bb6.i
+
+bb12.i:		; preds = %bb11.i
+	%113 = zext i32 %20 to i64		; <i64> [#uses=2]
+	%114 = getelementptr i8* %1, i64 %113		; <i8*> [#uses=2]
+	%115 = load i8* %114, align 1		; <i8> [#uses=2]
+	%116 = add i32 %19, 5		; <i32> [#uses=1]
+	%117 = zext i32 %116 to i64		; <i64> [#uses=2]
+	%118 = getelementptr i8* %1, i64 %117		; <i8*> [#uses=3]
+	%119 = load i8* %118, align 1		; <i8> [#uses=1]
+	%120 = icmp eq i8 %115, %119		; <i1> [#uses=1]
+	br i1 %120, label %bb13.i, label %bb1.preheader
+
+bb13.i:		; preds = %bb12.i
+	%121 = add i32 %19, 2		; <i32> [#uses=1]
+	%122 = zext i32 %121 to i64		; <i64> [#uses=1]
+	%123 = getelementptr i8* %1, i64 %122		; <i8*> [#uses=1]
+	%124 = load i8* %123, align 1		; <i8> [#uses=1]
+	%125 = add i32 %19, 6		; <i32> [#uses=1]
+	%126 = zext i32 %125 to i64		; <i64> [#uses=1]
+	%127 = getelementptr i8* %1, i64 %126		; <i8*> [#uses=1]
+	%128 = load i8* %127, align 1		; <i8> [#uses=1]
+	%129 = icmp eq i8 %124, %128		; <i1> [#uses=1]
+	br i1 %129, label %bb14.i, label %bb1.preheader
+
+bb14.i:		; preds = %bb13.i
+	%130 = zext i8 %115 to i32		; <i32> [#uses=1]
+	%131 = shl i32 %130, 8		; <i32> [#uses=1]
+	%.sum29.i = add i64 %113, 1		; <i64> [#uses=1]
+	%132 = getelementptr i8* %1, i64 %.sum29.i		; <i8*> [#uses=1]
+	%133 = load i8* %132, align 1		; <i8> [#uses=1]
+	%134 = zext i8 %133 to i32		; <i32> [#uses=1]
+	%135 = or i32 %134, %131		; <i32> [#uses=2]
+	%136 = zext i32 %135 to i64		; <i64> [#uses=1]
+	%137 = getelementptr i8* %1, i64 %136		; <i8*> [#uses=1]
+	%138 = bitcast i8* %137 to i16*		; <i16*> [#uses=1]
+	%139 = bitcast i8* %114 to i16*		; <i16*> [#uses=1]
+	%tmp.i = load i16* %138, align 1		; <i16> [#uses=1]
+	store i16 %tmp.i, i16* %139, align 1
+	%140 = load i8* %118, align 1		; <i8> [#uses=1]
+	%141 = zext i8 %140 to i32		; <i32> [#uses=1]
+	%142 = shl i32 %141, 8		; <i32> [#uses=1]
+	%.sum28.i = add i64 %117, 1		; <i64> [#uses=1]
+	%143 = getelementptr i8* %1, i64 %.sum28.i		; <i8*> [#uses=2]
+	%144 = load i8* %143, align 1		; <i8> [#uses=2]
+	%145 = zext i8 %144 to i32		; <i32> [#uses=1]
+	%146 = or i32 %142, %145		; <i32> [#uses=1]
+	%147 = add i32 %135, 2		; <i32> [#uses=1]
+	%148 = zext i32 %147 to i64		; <i64> [#uses=2]
+	%149 = getelementptr i8* %1, i64 %148		; <i8*> [#uses=1]
+	%150 = load i8* %149, align 1		; <i8> [#uses=1]
+	%151 = zext i8 %150 to i32		; <i32> [#uses=1]
+	%152 = shl i32 %151, 8		; <i32> [#uses=1]
+	%.sum27.i = add i64 %148, 1		; <i64> [#uses=1]
+	%153 = getelementptr i8* %1, i64 %.sum27.i		; <i8*> [#uses=2]
+	%154 = load i8* %153, align 1		; <i8> [#uses=1]
+	%155 = zext i8 %154 to i32		; <i32> [#uses=1]
+	%156 = or i32 %152, %155		; <i32> [#uses=1]
+	%157 = add i32 %156, %146		; <i32> [#uses=1]
+	%158 = lshr i32 %157, 8		; <i32> [#uses=1]
+	%159 = trunc i32 %158 to i8		; <i8> [#uses=1]
+	store i8 %159, i8* %118, align 1
+	%160 = load i8* %153, align 1		; <i8> [#uses=1]
+	%161 = add i8 %160, %144		; <i8> [#uses=1]
+	store i8 %161, i8* %143, align 1
+	br label %bb1.preheader
+
+bb1.preheader:		; preds = %bb14.i, %bb13.i, %bb12.i
+	%i.08 = add i32 %idx, 1		; <i32> [#uses=2]
+	%162 = getelementptr %struct.MemPage* %pPage, i64 0, i32 15		; <i16*> [#uses=4]
+	%163 = load i16* %162, align 4		; <i16> [#uses=2]
+	%164 = zext i16 %163 to i32		; <i32> [#uses=1]
+	%165 = icmp sgt i32 %164, %i.08		; <i1> [#uses=1]
+	br i1 %165, label %bb, label %bb2
+
+bb:		; preds = %bb, %bb1.preheader
+	%indvar = phi i64 [ 0, %bb1.preheader ], [ %indvar.next, %bb ]		; <i64> [#uses=3]
+	%tmp16 = add i32 %5, %4		; <i32> [#uses=1]
+	%tmp.17 = sext i32 %tmp16 to i64		; <i64> [#uses=1]
+	%tmp19 = shl i64 %indvar, 1		; <i64> [#uses=1]
+	%ctg2.sum = add i64 %tmp.17, %tmp19		; <i64> [#uses=4]
+	%ctg229 = getelementptr i8* %1, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%ctg229.sum31 = add i64 %ctg2.sum, 2		; <i64> [#uses=1]
+	%166 = getelementptr i8* %1, i64 %ctg229.sum31		; <i8*> [#uses=1]
+	%167 = load i8* %166, align 1		; <i8> [#uses=1]
+	store i8 %167, i8* %ctg229
+	%ctg229.sum30 = add i64 %ctg2.sum, 3		; <i64> [#uses=1]
+	%168 = getelementptr i8* %1, i64 %ctg229.sum30		; <i8*> [#uses=1]
+	%169 = load i8* %168, align 1		; <i8> [#uses=1]
+	%ctg229.sum = add i64 %ctg2.sum, 1		; <i64> [#uses=1]
+	%170 = getelementptr i8* %1, i64 %ctg229.sum		; <i8*> [#uses=1]
+	store i8 %169, i8* %170, align 1
+	%indvar15 = trunc i64 %indvar to i32		; <i32> [#uses=1]
+	%i.09 = add i32 %indvar15, %i.08		; <i32> [#uses=1]
+	%i.0 = add i32 %i.09, 1		; <i32> [#uses=1]
+	%171 = load i16* %162, align 4		; <i16> [#uses=2]
+	%172 = zext i16 %171 to i32		; <i32> [#uses=1]
+	%173 = icmp sgt i32 %172, %i.0		; <i1> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br i1 %173, label %bb, label %bb2
+
+bb2:		; preds = %bb, %bb1.preheader
+	%174 = phi i16 [ %163, %bb1.preheader ], [ %171, %bb ]		; <i16> [#uses=1]
+	%175 = add i16 %174, -1		; <i16> [#uses=2]
+	store i16 %175, i16* %162, align 4
+	%176 = load i8* %17, align 8		; <i8> [#uses=1]
+	%177 = zext i8 %176 to i32		; <i32> [#uses=1]
+	%178 = add i32 %177, 3		; <i32> [#uses=1]
+	%179 = zext i32 %178 to i64		; <i64> [#uses=1]
+	%180 = getelementptr i8* %1, i64 %179		; <i8*> [#uses=1]
+	%181 = lshr i16 %175, 8		; <i16> [#uses=1]
+	%182 = trunc i16 %181 to i8		; <i8> [#uses=1]
+	store i8 %182, i8* %180, align 1
+	%183 = load i8* %17, align 8		; <i8> [#uses=1]
+	%184 = zext i8 %183 to i32		; <i32> [#uses=1]
+	%185 = add i32 %184, 3		; <i32> [#uses=1]
+	%186 = zext i32 %185 to i64		; <i64> [#uses=1]
+	%187 = load i16* %162, align 4		; <i16> [#uses=1]
+	%188 = trunc i16 %187 to i8		; <i8> [#uses=1]
+	%.sum = add i64 %186, 1		; <i64> [#uses=1]
+	%189 = getelementptr i8* %1, i64 %.sum		; <i8*> [#uses=1]
+	store i8 %188, i8* %189, align 1
+	%190 = load i16* %41, align 2		; <i16> [#uses=1]
+	%191 = add i16 %190, 2		; <i16> [#uses=1]
+	store i16 %191, i16* %41, align 2
+	%192 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1		; <i8*> [#uses=1]
+	store i8 1, i8* %192, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
new file mode 100644
index 0000000..9778fa1
--- /dev/null
+++ b/test/CodeGen/X86/stdarg.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep {testb	\[%\]al, \[%\]al}
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define void @foo(i32 %x, ...) nounwind {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
+  %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap12)
+  %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+  call void @bar(%struct.__va_list_tag* %ap3) nounwind
+  call void @llvm.va_end(i8* %ap12)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind

diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
new file mode 100644
index 0000000..37f86c6
--- /dev/null
+++ b/test/CodeGen/X86/store-empty-member.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't crash on an empty struct member.
+
+; CHECK: movl  $2, 4(%esp)
+; CHECK: movl  $1, (%esp)
+
+%testType = type {i32, [0 x i32], i32}
+
+define void @foo() nounwind {
+  %1 = alloca %testType
+  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  ret void
+}

diff --git a/test/CodeGen/X86/store-fp-constant.ll b/test/CodeGen/X86/store-fp-constant.ll
new file mode 100644
index 0000000..206886b
--- /dev/null
+++ b/test/CodeGen/X86/store-fp-constant.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | not grep rodata
+; RUN: llc < %s -march=x86 | not grep literal
+;
+; Check that no FP constants in this testcase ends up in the 
+; constant pool.
+
+@G = external global float              ; <float*> [#uses=1]
+
+declare void @extfloat(float)
+
+declare void @extdouble(double)
+
+define void @testfloatstore() {
+        call void @extfloat( float 0x40934999A0000000 )
+        call void @extdouble( double 0x409349A631F8A090 )
+        store float 0x402A064C20000000, float* @G
+        ret void
+}
+

diff --git a/test/CodeGen/X86/store-global-address.ll b/test/CodeGen/X86/store-global-address.ll
new file mode 100644
index 0000000..c8d4cbc
--- /dev/null
+++ b/test/CodeGen/X86/store-global-address.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep movl | count 1
+
+@dst = global i32 0             ; <i32*> [#uses=1]
+@ptr = global i32* null         ; <i32**> [#uses=1]
+
+define void @test() {
+        store i32* @dst, i32** @ptr
+        ret void
+}
+

diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
new file mode 100644
index 0000000..66d0e47
--- /dev/null
+++ b/test/CodeGen/X86/store_op_load_fold.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | not grep mov
+;
+; Test the add and load are folded into the store instruction.
+
+@X = internal global i16 0              ; <i16*> [#uses=2]
+
+define void @foo() {
+        %tmp.0 = load i16* @X           ; <i16> [#uses=1]
+        %tmp.3 = add i16 %tmp.0, 329            ; <i16> [#uses=1]
+        store i16 %tmp.3, i16* @X
+        ret void
+}
+

diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
new file mode 100644
index 0000000..0ccfe47
--- /dev/null
+++ b/test/CodeGen/X86/store_op_load_fold2.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {and	DWORD PTR} | count 2
+
+target datalayout = "e-p:32:32"
+        %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) {
+entry:
+        br i1 true, label %cond_true2732.preheader, label %cond_true129
+cond_true129:           ; preds = %entry
+        ret i32 0
+cond_true2732.preheader:                ; preds = %entry
+        %tmp2666 = getelementptr %struct.Macroblock* null, i32 0, i32 13                ; <i64*> [#uses=2]
+        %tmp2674 = trunc i32 0 to i8            ; <i8> [#uses=1]
+        br i1 true, label %cond_true2732.preheader.split.us, label %cond_true2732.preheader.split
+cond_true2732.preheader.split.us:               ; preds = %cond_true2732.preheader
+        br i1 true, label %cond_true2732.outer.us.us, label %cond_true2732.outer.us
+cond_true2732.outer.us.us:              ; preds = %cond_true2732.preheader.split.us
+        %tmp2667.us.us = load i64* %tmp2666             ; <i64> [#uses=1]
+        %tmp2670.us.us = load i64* null         ; <i64> [#uses=1]
+        %shift.upgrd.1 = zext i8 %tmp2674 to i64                ; <i64> [#uses=1]
+        %tmp2675.us.us = shl i64 %tmp2670.us.us, %shift.upgrd.1         ; <i64> [#uses=1]
+        %tmp2675not.us.us = xor i64 %tmp2675.us.us, -1          ; <i64> [#uses=1]
+        %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us              ; <i64> [#uses=1]
+        store i64 %tmp2676.us.us, i64* %tmp2666
+        ret i32 0
+cond_true2732.outer.us:         ; preds = %cond_true2732.preheader.split.us
+        ret i32 0
+cond_true2732.preheader.split:          ; preds = %cond_true2732.preheader
+        ret i32 0
+cond_next2752:          ; No predecessors!
+        ret i32 0
+}
+

diff --git a/test/CodeGen/X86/storetrunc-fp.ll b/test/CodeGen/X86/storetrunc-fp.ll
new file mode 100644
index 0000000..03ad093
--- /dev/null
+++ b/test/CodeGen/X86/storetrunc-fp.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep flds
+
+define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
+	%c = fadd x86_fp80 %a, %b
+	%d = fptrunc x86_fp80 %c to float
+	store float %d, float* %fp
+	ret void
+}

diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
new file mode 100644
index 0000000..f4847a3
--- /dev/null
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll

@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
+
+; P should be sunk into the loop and folded into the address mode. There
+; shouldn't be any lea instructions inside the loop.
+
+@B = external global [1000 x i8], align 32
+@A = external global [1000 x i8], align 32
+@P = external global [1000 x i8], align 32
+@Q = external global [1000 x i8], align 32
+
+define void @foo(i32 %m, i32 %p) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load i8* %tmp2, align 4
+	%tmp4 = mul i8 %tmp3, 2
+	%tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+	store i8 %tmp4, i8* %tmp5, align 4
+	%tmp8 = mul i32 %i.019.0, 9
+        %tmp0 = add i32 %tmp8, %p
+	%tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0
+	store i8 17, i8* %tmp10, align 4
+	%tmp11 = getelementptr [1000 x i8]* @Q, i32 0, i32 %tmp0
+	store i8 19, i8* %tmp11, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
+

diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
new file mode 100644
index 0000000..5cbd895
--- /dev/null
+++ b/test/CodeGen/X86/stride-reuse.ll

@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86-64 | not grep lea
+
+@B = external global [1000 x float], align 32
+@A = external global [1000 x float], align 32
+@P = external global [1000 x i32], align 32
+
+define void @foo(i32 %m) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load float* %tmp2, align 4
+	%tmp4 = fmul float %tmp3, 2.000000e+00
+	%tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0
+	store float %tmp4, float* %tmp5, align 4
+	%tmp8 = shl i32 %i.019.0, 1
+	%tmp9 = add i32 %tmp8, 64
+	%tmp10 = getelementptr [1000 x i32]* @P, i32 0, i32 %i.019.0
+	store i32 %tmp9, i32* %tmp10, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}

diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
new file mode 100644
index 0000000..19f4079
--- /dev/null
+++ b/test/CodeGen/X86/sub-with-overflow.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jb} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)

diff --git a/test/CodeGen/X86/subreg-to-reg-0.ll b/test/CodeGen/X86/subreg-to-reg-0.ll
new file mode 100644
index 0000000..d718c85
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-0.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+; Do eliminate the zero-extension instruction and rely on
+; x86-64's implicit zero-extension!
+
+define i64 @foo(i32* %p) nounwind {
+  %t = load i32* %p
+  %n = add i32 %t, 1
+  %z = zext i32 %n to i64
+  ret i64 %z
+}

diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
new file mode 100644
index 0000000..a297728
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {leal	.*), %e.\*} | count 1
+
+; Don't eliminate or coalesce away the explicit zero-extension!
+; This is currently using an leal because of a 3-addressification detail,
+; though this isn't necessary; The point of this test is to make sure
+; a 32-bit add is used.
+
+define i64 @foo(i64 %a) nounwind {
+  %b = add i64 %a, 4294967295
+  %c = and i64 %b, 4294967295
+  %d = add i64 %c, 1
+  ret i64 %d
+}

diff --git a/test/CodeGen/X86/subreg-to-reg-2.ll b/test/CodeGen/X86/subreg-to-reg-2.ll
new file mode 100644
index 0000000..49d2e88
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-2.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl
+; rdar://6707985
+
+	%XXOO = type { %"struct.XXC::XXCC", i8*, %"struct.XXC::XXOO::$_71" }
+	%XXValue = type opaque
+	%"struct.XXC::ArrayStorage" = type { i32, i32, i32, i8*, i8*, [1 x %XXValue*] }
+	%"struct.XXC::XXArray" = type { %XXOO, i32, %"struct.XXC::ArrayStorage"* }
+	%"struct.XXC::XXCC" = type { i32 (...)**, i8* }
+	%"struct.XXC::XXOO::$_71" = type { [2 x %XXValue*] }
+
+define internal fastcc %XXValue* @t(i64* %out, %"struct.XXC::ArrayStorage"* %tmp9) nounwind {
+prologue:
+	%array = load %XXValue** inttoptr (i64 11111111 to %XXValue**)		; <%XXValue*> [#uses=0]
+	%index = load %XXValue** inttoptr (i64 22222222 to %XXValue**)		; <%XXValue*> [#uses=1]
+	%tmp = ptrtoint %XXValue* %index to i64		; <i64> [#uses=2]
+	store i64 %tmp, i64* %out
+	%tmp6 = trunc i64 %tmp to i32		; <i32> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %prologue
+	%tmp10 = zext i32 %tmp6 to i64		; <i64> [#uses=1]
+	%tmp11 = getelementptr %"struct.XXC::ArrayStorage"* %tmp9, i64 0, i32 5, i64 %tmp10		; <%XXValue**> [#uses=1]
+	%tmp12 = load %XXValue** %tmp11, align 8		; <%XXValue*> [#uses=1]
+	ret %XXValue* %tmp12
+}

diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll
new file mode 100644
index 0000000..931ae75
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-3.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep imull
+
+; Don't eliminate or coalesce away the explicit zero-extension!
+
+define i64 @foo(i64 %a) {
+  %b = mul i64 %a, 7823
+  %c = and i64 %b, 4294967295
+  %d = add i64 %c, 1
+  ret i64 %d
+}

diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll
new file mode 100644
index 0000000..0ea5541
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll

@@ -0,0 +1,135 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep leaq %t
+; RUN: not grep incq %t
+; RUN: not grep decq %t
+; RUN: not grep negq %t
+; RUN: not grep addq %t
+; RUN: not grep subq %t
+; RUN: not grep {movl	%} %t
+
+; Utilize implicit zero-extension on x86-64 to eliminate explicit
+; zero-extensions. Shrink 64-bit adds to 32-bit when the high
+; 32-bits will be zeroed.
+
+define void @bar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i64 %x, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @easy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i32 %x, %y
+        %tn = zext i32 %t0 to i64
+	%t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = add i64 %p, %y
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = add i64 %p, %y
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+	%t0 = add i64 %a, %b
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @avo(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+	%t0 = add i64 %x, 734847
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @phe(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+	%t0 = add i64 %x, 734847
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @oze(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i64 %y, 1
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+
+define void @sbar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 %x, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @seasy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i32 %x, %y
+        %tn = zext i32 %t0 to i64
+	%t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = sub i64 %p, %y
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = sub i64 %p, %y
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+	%t0 = sub i64 %a, %b
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @swya(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 0, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @soze(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 %y, 1
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}

diff --git a/test/CodeGen/X86/subreg-to-reg-6.ll b/test/CodeGen/X86/subreg-to-reg-6.ll
new file mode 100644
index 0000000..76430cd
--- /dev/null
+++ b/test/CodeGen/X86/subreg-to-reg-6.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+
+define i64 @foo() nounwind {
+entry:
+	%t0 = load i32* null, align 8
+	switch i32 %t0, label %bb65 [
+		i32 16, label %bb
+		i32 12, label %bb56
+	]
+
+bb:
+	br label %bb65
+
+bb56:
+	unreachable
+
+bb65:
+	%a = phi i64 [ 0, %bb ], [ 0, %entry ]
+	tail call void asm "", "{cx}"(i64 %a) nounwind
+	%t15 = and i64 %a, 4294967295
+	ret i64 %t15
+}
+
+define i64 @bar(i64 %t0) nounwind {
+	call void asm "", "{cx}"(i64 0) nounwind
+	%t1 = sub i64 0, %t0
+	%t2 = and i64 %t1, 4294967295
+	ret i64 %t2
+}

diff --git a/test/CodeGen/X86/switch-crit-edge-constant.ll b/test/CodeGen/X86/switch-crit-edge-constant.ll
new file mode 100644
index 0000000..1f2ab0d
--- /dev/null
+++ b/test/CodeGen/X86/switch-crit-edge-constant.ll

@@ -0,0 +1,52 @@
+; PR925
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep mov.*str1 | count 1
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+@str1 = internal constant [5 x i8] c"bonk\00"		; <[5 x i8]*> [#uses=1]
+@str2 = internal constant [5 x i8] c"bork\00"		; <[5 x i8]*> [#uses=1]
+@str = internal constant [8 x i8] c"perfwap\00"		; <[8 x i8]*> [#uses=1]
+
+define void @foo(i32 %C) {
+entry:
+	switch i32 %C, label %bb2 [
+		 i32 1, label %blahaha
+		 i32 2, label %blahaha
+		 i32 3, label %blahaha
+		 i32 4, label %blahaha
+		 i32 5, label %blahaha
+		 i32 6, label %blahaha
+		 i32 7, label %blahaha
+		 i32 8, label %blahaha
+		 i32 9, label %blahaha
+		 i32 10, label %blahaha
+	]
+
+bb2:		; preds = %entry
+	%tmp5 = and i32 %C, 123		; <i32> [#uses=1]
+	%tmp = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %blahaha, label %cond_true
+
+cond_true:		; preds = %bb2
+	br label %blahaha
+
+blahaha:		; preds = %cond_true, %bb2, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	%s.0 = phi i8* [ getelementptr ([8 x i8]* @str, i32 0, i64 0), %cond_true ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str2, i32 0, i64 0), %bb2 ]		; <i8*> [#uses=13]
+	%tmp8 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp10 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp12 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp14 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp16 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp18 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp20 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp22 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp24 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp26 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp28 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp30 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp32 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...)

diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll
new file mode 100644
index 0000000..55425bc
--- /dev/null
+++ b/test/CodeGen/X86/switch-zextload.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+
+; Do zextload, instead of a load and a separate zext.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.move_s = type { i32, i32, i32, i32, i32, i32 }
+	%struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
+
+define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
+entry:
+	%0 = load i8* null, align 1		; <i8> [#uses=1]
+	switch i8 %0, label %return [
+		i8 2, label %bb31
+		i8 0, label %bb80
+		i8 1, label %bb82
+		i8 3, label %bb84
+	]
+
+bb31:		; preds = %entry
+	unreachable
+
+bb80:		; preds = %entry
+	ret void
+
+bb82:		; preds = %entry
+	ret void
+
+bb84:		; preds = %entry
+	ret void
+
+return:		; preds = %entry
+	ret void
+}

diff --git a/test/CodeGen/X86/swizzle.ll b/test/CodeGen/X86/swizzle.ll
new file mode 100644
index 0000000..23e0c24
--- /dev/null
+++ b/test/CodeGen/X86/swizzle.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movlps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movups
+; rdar://6523650
+
+	%struct.vector4_t = type { <4 x float> }
+
+define void @swizzle(i8* nocapture %a, %struct.vector4_t* nocapture %b, %struct.vector4_t* nocapture %c) nounwind {
+entry:
+	%0 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%1 = load <4 x float>* %0, align 4		; <<4 x float>> [#uses=1]
+	%tmp.i = bitcast i8* %a to double*		; <double*> [#uses=1]
+	%tmp1.i = load double* %tmp.i		; <double> [#uses=1]
+	%2 = insertelement <2 x double> undef, double %tmp1.i, i32 0		; <<2 x double>> [#uses=1]
+	%tmp2.i = bitcast <2 x double> %2 to <4 x float>		; <<4 x float>> [#uses=1]
+	%3 = shufflevector <4 x float> %1, <4 x float> %tmp2.i, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %3, <4 x float>* %0, align 4
+	ret void
+}

diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
new file mode 100644
index 0000000..7b21e1b
--- /dev/null
+++ b/test/CodeGen/X86/tail-opts.ll

@@ -0,0 +1,408 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false -post-RA-scheduler=true | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+@HABC = global i32 0
+
+; BranchFolding should tail-merge the stores since they all precede
+; direct branches to the same place.
+
+; CHECK: tail_merge_me:
+; CHECK-NOT:  GHJK
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: movl $1, HABC(%rip)
+; CHECK-NOT:  GHJK
+
+define void @tail_merge_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  store i32 1, i32* @HABC
+  %c = call i1 @qux()
+  br i1 %c, label %return, label %altret
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+declare i8* @choose(i8*, i8*)
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%rbx
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+; BranchFolding shouldn't try to merge the tails of two blocks
+; with only a branch in common, regardless of the fallthrough situation.
+
+; CHECK: dont_merge_oddly:
+; CHECK-NOT:   ret
+; CHECK:        ucomiss %xmm1, %xmm2
+; CHECK-NEXT:   jbe .LBB3_3
+; CHECK-NEXT:   ucomiss %xmm0, %xmm1
+; CHECK-NEXT:   ja .LBB3_4
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT:   movb $1, %al
+; CHECK-NEXT:   ret
+; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT:   ucomiss %xmm0, %xmm2
+; CHECK-NEXT:   jbe .LBB3_2
+; CHECK-NEXT: .LBB3_4:
+; CHECK-NEXT:   xorb %al, %al
+; CHECK-NEXT:   ret
+
+define i1 @dont_merge_oddly(float* %result) nounwind {
+entry:
+  %tmp4 = getelementptr float* %result, i32 2
+  %tmp5 = load float* %tmp4, align 4
+  %tmp7 = getelementptr float* %result, i32 4
+  %tmp8 = load float* %tmp7, align 4
+  %tmp10 = getelementptr float* %result, i32 6
+  %tmp11 = load float* %tmp10, align 4
+  %tmp12 = fcmp olt float %tmp8, %tmp11
+  br i1 %tmp12, label %bb, label %bb21
+
+bb:
+  %tmp23469 = fcmp olt float %tmp5, %tmp8
+  br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:
+  %tmp23 = fcmp olt float %tmp5, %tmp11
+  br i1 %tmp23, label %bb26, label %bb30
+
+bb26:
+  ret i1 0
+
+bb30:
+  ret i1 1
+}
+
+; Do any-size tail-merging when two candidate blocks will both require
+; an unconditional jump to complete a two-way conditional branch.
+
+; CHECK: c_expand_expr_stmt:
+; CHECK:        jmp .LBB4_7
+; CHECK-NEXT: .LBB4_12:
+; CHECK-NEXT:   movq 8(%rax), %rax
+; CHECK-NEXT:   movb 16(%rax), %al
+; CHECK-NEXT:   cmpb $16, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   cmpb $23, %al
+; CHECK-NEXT:   je .LBB4_6
+; CHECK-NEXT:   jmp .LBB4_15
+; CHECK-NEXT: .LBB4_14:
+; CHECK-NEXT:   cmpb $23, %bl
+; CHECK-NEXT:   jne .LBB4_15
+; CHECK-NEXT: .LBB4_15:
+
+%0 = type { %struct.rtx_def* }
+%struct.lang_decl = type opaque
+%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
+%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
+%union..2anon = type { i32 }
+%union.rtunion = type { i8* }
+%union.tree_node = type { %struct.tree_decl }
+
+define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
+entry:
+  %tmp4 = load i8* null, align 8                  ; <i8> [#uses=3]
+  switch i8 %tmp4, label %bb3 [
+    i8 18, label %bb
+  ]
+
+bb:                                               ; preds = %entry
+  switch i32 undef, label %bb1 [
+    i32 0, label %bb2.i
+    i32 37, label %bb.i
+  ]
+
+bb.i:                                             ; preds = %bb
+  switch i32 undef, label %bb1 [
+    i32 0, label %lvalue_p.exit
+  ]
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3
+
+lvalue_p.exit:                                    ; preds = %bb.i
+  %tmp21 = load %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
+  %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
+  %tmp23 = load i8* %tmp22, align 8               ; <i8> [#uses=1]
+  %tmp24 = zext i8 %tmp23 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp24, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i3
+    i32 2, label %bb.i1
+  ]
+
+bb.i1:                                            ; preds = %lvalue_p.exit
+  %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
+  %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+  %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp29 = load i8* %tmp28, align 8               ; <i8> [#uses=1]
+  %tmp30 = zext i8 %tmp29 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp30, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i.i2
+    i32 2, label %bb.i.i
+  ]
+
+bb.i.i:                                           ; preds = %bb.i1
+  %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
+  %phitmp = icmp ne i32 %tmp34, 0                 ; <i1> [#uses=1]
+  br label %lvalue_p.exit4
+
+bb2.i.i2:                                         ; preds = %bb.i1
+  %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp39 = load i8* %tmp38, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp39, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+bb2.i3:                                           ; preds = %lvalue_p.exit
+  %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp44 = load i8* %tmp43, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp44, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+lvalue_p.exit4:                                   ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
+  %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
+  %tmp46 = icmp eq i8 %tmp4, 0                    ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp45, %tmp46                 ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb2, label %bb3
+
+bb1:                                              ; preds = %bb2.i.i, %bb.i, %bb
+  %.old = icmp eq i8 %tmp4, 23                    ; <i1> [#uses=1]
+  br i1 %.old, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
+  %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
+  unreachable
+}
+
+declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
+
+declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
+
+
+; If one tail merging candidate falls through into the other,
+; tail merging is likely profitable regardless of how few
+; instructions are involved. This function should have only
+; one ret instruction.
+
+; CHECK: foo:
+; CHECK:        callq func
+; CHECK-NEXT: .LBB5_2:
+; CHECK-NEXT:   addq $8, %rsp
+; CHECK-NEXT:   ret
+
+define void @foo(i1* %V) nounwind {
+entry:
+  %t0 = icmp eq i1* %V, null
+  br i1 %t0, label %return, label %bb
+
+bb:
+  call void @func()
+  ret void
+
+return:
+  ret void
+}
+
+declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}

diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
new file mode 100644
index 0000000..d54fb41
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-fastisel.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | grep TAILCALL
+
+; Fast-isel shouldn't attempt to handle this tail call, and it should
+; cleanly terminate instruction selection in the block after it's
+; done to avoid emitting invalid MachineInstrs.
+
+%0 = type { i64, i32, i8* }
+
+define fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
+fail:                                             ; preds = %entry
+  %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
+  ret i8* %tmp20
+}

diff --git a/test/CodeGen/X86/tailcall-i1.ll b/test/CodeGen/X86/tailcall-i1.ll
new file mode 100644
index 0000000..8ef1f11
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-i1.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc i1 @i1test(i32, i32, i32, i32) {
+  entry:
+  %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+  ret i1 %4
+}

diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
new file mode 100644
index 0000000..8ddc405
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-largecode.ll

@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
+
+declare fastcc i32 @callee(i32 %arg)
+define fastcc i32 @directcall(i32 %arg) {
+entry:
+; This is the large code model, so &callee may not fit into the jmp
+; instruction.  Instead, stick it into a register.
+;  CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]]
+;  CHECK: jmpq    *[[REGISTER]]  # TAILCALL
+  %res = tail call fastcc i32 @callee(i32 %arg)
+  ret i32 %res
+}
+
+; Check that the register used for an indirect tail call doesn't
+; clobber any of the arguments.
+define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: subq $8, %rsp
+; Put the call target into R11, which won't be clobbered while restoring
+; callee-saved registers and won't be used for passing arguments.
+;  CHECK: movq %rdi, %r11
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; Adjust the stack to "return".
+;  CHECK: addq $8, %rsp
+; And tail-call to the target.
+;  CHECK: jmpq *%r11  # TAILCALL
+  %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                      i32 6, i32 7)
+  ret i32 %res
+}
+
+; Check that the register used for a direct tail call doesn't clobber
+; any of the arguments.
+declare fastcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32)
+define fastcc i32 @direct_manyargs() {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: subq $8, %rsp
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction.  Put it into R11, which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+;  CHECK: movabsq $manyargs_callee, %r11
+; Adjust the stack to "return".
+;  CHECK: addq $8, %rsp
+; And tail-call to the target.
+;  CHECK: jmpq *%r11  # TAILCALL
+  %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
+                                               i32 5, i32 6, i32 7)
+  ret i32 %res
+}

diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
new file mode 100644
index 0000000..0233139
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-stackalign.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s  -mtriple=i686-unknown-linux  -tailcallopt | FileCheck %s
+; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt
+; is enabled, ensure that a normal fastcc call has matching stack size
+
+
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+       ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2, i32 %in3, i32 %in4) {
+       %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2,
+                                                 i32 %in1, i32 %in2)
+       ret i32 %tmp11
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+ %tmp1 = call fastcc i32 @tailcaller( i32 1, i32 2, i32 3, i32 4 )
+ ; expect match subl [stacksize] here
+ ret i32 0
+}
+
+; CHECK: call tailcaller
+; CHECK-NEXT: subl $12

diff --git a/test/CodeGen/X86/tailcall-structret.ll b/test/CodeGen/X86/tailcall-structret.ll
new file mode 100644
index 0000000..d8be4b2
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-structret.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
+entry:
+      %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
+      ret { { i8*, i8* }*, i8*} %2
+}

diff --git a/test/CodeGen/X86/tailcall-void.ll b/test/CodeGen/X86/tailcall-void.ll
new file mode 100644
index 0000000..4e578d1
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-void.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc void @i1test(i32, i32, i32, i32) {
+  entry:
+   tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+   ret void 
+}

diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll
new file mode 100644
index 0000000..f7ff5d5
--- /dev/null
+++ b/test/CodeGen/X86/tailcall1.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 5
+
+; With -tailcallopt, CodeGen guarantees a tail call optimization
+; for all of these.
+
+declare fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
+entry:
+  %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
+  ret i32 %tmp11
+}
+
+declare fastcc i8* @alias_callee()
+
+define fastcc noalias i8* @noalias_caller() nounwind {
+  %p = tail call fastcc i8* @alias_callee()
+  ret i8* %p
+}
+
+declare fastcc noalias i8* @noalias_callee()
+
+define fastcc i8* @alias_caller() nounwind {
+  %p = tail call fastcc noalias i8* @noalias_callee()
+  ret i8* %p
+}
+
+declare fastcc i32 @i32_callee()
+
+define fastcc i32 @ret_undef() nounwind {
+  %p = tail call fastcc i32 @i32_callee()
+  ret i32 undef
+}
+
+declare fastcc void @does_not_return()
+
+define fastcc i32 @noret() nounwind {
+  tail call fastcc void @does_not_return()
+  unreachable
+}

diff --git a/test/CodeGen/X86/tailcall2.ll b/test/CodeGen/X86/tailcall2.ll
new file mode 100644
index 0000000..80bab61
--- /dev/null
+++ b/test/CodeGen/X86/tailcall2.ll

@@ -0,0 +1,197 @@
+; RUN: llc < %s -march=x86    -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s -check-prefix=64
+
+define void @t1(i32 %x) nounwind ssp {
+entry:
+; 32: t1:
+; 32: jmp {{_?}}foo
+
+; 64: t1:
+; 64: jmp {{_?}}foo
+  tail call void @foo() nounwind
+  ret void
+}
+
+declare void @foo()
+
+define void @t2() nounwind ssp {
+entry:
+; 32: t2:
+; 32: jmp {{_?}}foo2
+
+; 64: t2:
+; 64: jmp {{_?}}foo2
+  %0 = tail call i32 @foo2() nounwind
+  ret void
+}
+
+declare i32 @foo2()
+
+define void @t3() nounwind ssp {
+entry:
+; 32: t3:
+; 32: jmp {{_?}}foo3
+
+; 64: t3:
+; 64: jmp {{_?}}foo3
+  %0 = tail call i32 @foo3() nounwind
+  ret void
+}
+
+declare i32 @foo3()
+
+define void @t4(void (i32)* nocapture %x) nounwind ssp {
+entry:
+; 32: t4:
+; 32: call *
+; FIXME: gcc can generate a tailcall for this. But it's tricky.
+
+; 64: t4:
+; 64-NOT: call
+; 64: jmpq *
+  tail call void %x(i32 0) nounwind
+  ret void
+}
+
+define void @t5(void ()* nocapture %x) nounwind ssp {
+entry:
+; 32: t5:
+; 32-NOT: call
+; 32: jmpl *
+
+; 64: t5:
+; 64-NOT: call
+; 64: jmpq *
+  tail call void %x() nounwind
+  ret void
+}
+
+define i32 @t6(i32 %x) nounwind ssp {
+entry:
+; 32: t6:
+; 32: call {{_?}}t6
+; 32: jmp {{_?}}bar
+
+; 64: t6:
+; 64: jmp {{_?}}t6
+; 64: jmp {{_?}}bar
+  %0 = icmp slt i32 %x, 10
+  br i1 %0, label %bb, label %bb1
+
+bb:
+  %1 = add nsw i32 %x, -1
+  %2 = tail call i32 @t6(i32 %1) nounwind ssp
+  ret i32 %2
+
+bb1:
+  %3 = tail call i32 @bar(i32 %x) nounwind
+  ret i32 %3
+}
+
+declare i32 @bar(i32)
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind ssp {
+entry:
+; 32: t7:
+; 32: jmp {{_?}}bar2
+
+; 64: t7:
+; 64: jmp {{_?}}bar2
+  %0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind
+  ret i32 %0
+}
+
+declare i32 @bar2(i32, i32, i32)
+
+define signext i16 @t8() nounwind ssp {
+entry:
+; 32: t8:
+; 32: call {{_?}}bar3
+
+; 64: t8:
+; 64: callq {{_?}}bar3
+  %0 = tail call signext i16 @bar3() nounwind      ; <i16> [#uses=1]
+  ret i16 %0
+}
+
+declare signext i16 @bar3()
+
+define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
+entry:
+; 32: t9:
+; 32: call *
+
+; 64: t9:
+; 64: callq *
+  %0 = bitcast i32 (i32)* %x to i16 (i32)*
+  %1 = tail call signext i16 %0(i32 0) nounwind
+  ret i16 %1
+}
+
+define void @t10() nounwind ssp {
+entry:
+; 32: t10:
+; 32: call
+
+; 64: t10:
+; 64: callq
+  %0 = tail call i32 @foo4() noreturn nounwind
+  unreachable
+}
+
+declare i32 @foo4()
+
+define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
+; In 32-bit mode, it's emitting a bunch of dead loads that are not being
+; eliminated currently.
+
+; 32: t11:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32: jne
+; 32-NOT: movl
+; 32-NOT: addl ${{[0-9]+}}, %esp
+; 32: jmp {{_?}}foo5
+
+; 64: t11:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64: jmp {{_?}}foo5
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb6, label %bb
+
+bb:
+  %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind
+  ret i32 %1
+
+bb6:
+  ret i32 0
+}
+
+declare i32 @foo5(i32, i32, i32, i32, i32)
+
+%struct.t = type { i32, i32, i32, i32, i32 }
+
+define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
+; 32: t12:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32-NOT: addl ${{[0-9]+}}, %esp
+; 32: jmp {{_?}}foo6
+
+; 64: t12:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64: jmp {{_?}}foo6
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb2, label %bb
+
+bb:
+  %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind
+  ret i32 %1
+
+bb2:
+  ret i32 0
+}
+
+declare i32 @foo6(i32, i32, %struct.t* byval align 4)

diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
new file mode 100644
index 0000000..7002560
--- /dev/null
+++ b/test/CodeGen/X86/tailcallbyval.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+%struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
+                  i32, i32, i32, i32, i32, i32, i32, i32,
+                  i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define  fastcc i32 @tailcallee(%struct.s* byval %a) nounwind {
+entry:
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define  fastcc i32 @tailcaller(%struct.s* byval %a) nounwind {
+entry:
+        %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* %a byval)
+        ret i32 %tmp4
+}

diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
new file mode 100644
index 0000000..7c685b8
--- /dev/null
+++ b/test/CodeGen/X86/tailcallbyval64.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep TAILCALL
+; Expect 2 rep;movs because of tail call byval lowering.
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep rep | wc -l | grep 2
+; A sequence of copyto/copyfrom virtual registers is used to deal with byval
+; lowering appearing after moving arguments to registers. The following two
+; checks verify that the register allocator changes those sequences to direct
+; moves to argument register where it can (for registers that are not used in 
+; byval lowering - not rsi, not rdi, not rcx).
+; Expect argument 4 to be moved directly to register edx.
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {7} | grep edx
+; Expect argument 6 to be moved directly to register r8.
+; RUN: llc < %s -march=x86-64  -tailcallopt  | grep movl | grep {17} | grep r8
+
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64 }
+
+declare  fastcc i64 @tailcallee(%struct.s* byval %a, i64 %val, i64 %val2, i64 %val3, i64 %val4, i64 %val5)
+
+
+define  fastcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
+entry:
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        %tmp3 = load i64* %tmp2, align 8
+        %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* %a byval, i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
+        ret i64 %tmp4
+}
+
+

diff --git a/test/CodeGen/X86/tailcallfp.ll b/test/CodeGen/X86/tailcallfp.ll
new file mode 100644
index 0000000..c0b609a
--- /dev/null
+++ b/test/CodeGen/X86/tailcallfp.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
+     %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
+     ret i32 %Y
+}

diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
new file mode 100644
index 0000000..3841f51
--- /dev/null
+++ b/test/CodeGen/X86/tailcallfp2.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
+
+declare i32 @putchar(i32)
+
+define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+        %tmp1 = icmp sgt i32 %x, 0
+        br i1 %tmp1, label %if-then, label %if-else
+
+if-then:
+        %fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)* 
+        %arg1    = add i32 %x, -1
+        call i32 @putchar(i32 90)       
+        %res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
+        ret i32 %res
+
+if-else:
+        ret i32  %x
+}
+
+
+define i32 @main() nounwind { 
+ %f   = bitcast i32 (i32, i32*, i32)* @checktail to i32*
+ %res = tail call fastcc i32 @checktail( i32 10, i32* %f,i32 10)
+ ret i32 %res
+}

diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
new file mode 100644
index 0000000..60e3be5
--- /dev/null
+++ b/test/CodeGen/X86/tailcallpic1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+
+define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}

diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
new file mode 100644
index 0000000..eaa7631
--- /dev/null
+++ b/test/CodeGen/X86/tailcallpic2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}

diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
new file mode 100644
index 0000000..d05dff8
--- /dev/null
+++ b/test/CodeGen/X86/tailcallstack64.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s
+
+; Check that lowered arguments on the stack do not overwrite each other.
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: movl  %edi, %eax
+; Move param %in1 to temp register (%r10d).
+; CHECK: movl  40(%rsp), %r10d
+; Move param %in2 to stack.
+; CHECK: movl  %r10d, 32(%rsp)
+; Move result of addition to stack.
+; CHECK: movl  %eax, 40(%rsp)
+; Eventually, do a TAILCALL
+; CHECK: TAILCALL
+
+declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b) nounwind
+
+define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) nounwind {
+entry:
+        %tmp = add i32 %in1, %p1
+        %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
+        ret i32 %retval
+}
+

diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll
new file mode 100644
index 0000000..f1063dc
--- /dev/null
+++ b/test/CodeGen/X86/test-nofold.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; rdar://5752025
+
+; We want:
+;      CHECK: movl	4(%esp), %ecx
+; CHECK-NEXT: andl	$15, %ecx
+; CHECK-NEXT: movl	$42, %eax
+; CHECK-NEXT: cmovel	%ecx, %eax
+; CHECK-NEXT: ret
+;
+; We don't want:
+;	movl	4(%esp), %eax
+;	movl	%eax, %ecx     # bad: extra copy
+;	andl	$15, %ecx
+;	testl	$15, %eax      # bad: peep obstructed
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+;
+; We also don't want:
+;	movl	$15, %ecx      # bad: larger encoding
+;	andl	4(%esp), %ecx
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+;
+; We also don't want:
+;	movl	4(%esp), %ecx
+;	andl	$15, %ecx
+;	testl	%ecx, %ecx     # bad: unnecessary test
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+
+define i32 @t1(i32 %X) nounwind  {
+entry:
+	%tmp2 = and i32 %X, 15		; <i32> [#uses=2]
+	%tmp4 = icmp eq i32 %tmp2, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp4, i32 %tmp2, i32 42		; <i32> [#uses=1]
+	ret i32 %retval
+}
+

diff --git a/test/CodeGen/X86/test-shrink-bug.ll b/test/CodeGen/X86/test-shrink-bug.ll
new file mode 100644
index 0000000..64631ea
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink-bug.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+; Codegen shouldn't reduce the comparison down to testb $-1, %al
+; because that changes the result of the signed test.
+; PR5132
+; CHECK: testw  $255, %ax
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g_14 = global i8 -6, align 1                     ; <i8*> [#uses=1]
+
+declare i32 @func_16(i8 signext %p_19, i32 %p_20) nounwind
+
+define i32 @func_35(i64 %p_38) nounwind ssp {
+entry:
+  %tmp = load i8* @g_14                           ; <i8> [#uses=2]
+  %conv = zext i8 %tmp to i32                     ; <i32> [#uses=1]
+  %cmp = icmp sle i32 1, %conv                    ; <i1> [#uses=1]
+  %conv2 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  %call = call i32 @func_16(i8 signext %tmp, i32 %conv2) ssp ; <i32> [#uses=1]
+  ret i32 1
+}

diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll
new file mode 100644
index 0000000..1d63693
--- /dev/null
+++ b/test/CodeGen/X86/test-shrink.ll

@@ -0,0 +1,158 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+; CHECK-64: g64xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g64xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g64xh(i64 inreg %x) nounwind {
+  %t = and i64 %x, 2048
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g64xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g64xl(i64 inreg %x) nounwind {
+  %t = and i64 %x, 8
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g32xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g32xh(i32 inreg %x) nounwind {
+  %t = and i32 %x, 2048
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g32xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g32xl(i32 inreg %x) nounwind {
+  %t = and i32 %x, 8
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xh:
+; CHECK-64:   testb $8, %ah
+; CHECK-64:   ret
+; CHECK-32: g16xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g16xh(i16 inreg %x) nounwind {
+  %t = and i16 %x, 2048
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xl:
+; CHECK-64:   testb $8, %dil
+; CHECK-64:   ret
+; CHECK-32: g16xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g16xl(i16 inreg %x) nounwind {
+  %t = and i16 %x, 8
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g64x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g64x16(i64 inreg %x) nounwind {
+  %t = and i64 %x, 32896
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32x16:
+; CHECK-64:   testw $-32640, %di
+; CHECK-64:   ret
+; CHECK-32: g32x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g32x16(i32 inreg %x) nounwind {
+  %t = and i32 %x, 32896
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x32:
+; CHECK-64:   testl $268468352, %edi
+; CHECK-64:   ret
+; CHECK-32: g64x32:
+; CHECK-32:   testl $268468352, %eax
+; CHECK-32:   ret
+define void @g64x32(i64 inreg %x) nounwind {
+  %t = and i64 %x, 268468352
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+
+declare void @bar()

diff --git a/test/CodeGen/X86/testl-commute.ll b/test/CodeGen/X86/testl-commute.ll
new file mode 100644
index 0000000..3d5f672
--- /dev/null
+++ b/test/CodeGen/X86/testl-commute.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s | grep {testl.*\(%r.i\), %} | count 3
+; rdar://5671654
+; The loads should fold into the testl instructions, no matter how
+; the inputs are commuted.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin7"
+
+define i32 @test(i32* %P, i32* %G) nounwind {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %0
+
+bb1:		; preds = %entry
+	ret i32 %0
+}
+
+define i32 @test2(i32* %P, i32* %G) nounwind {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %0, %1		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %0
+
+bb1:		; preds = %entry
+	ret i32 %0
+}
+define i32 @test3(i32* %P, i32* %G) nounwind {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %0, %1		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %1
+
+bb1:		; preds = %entry
+	ret i32 %1
+}
+
+declare i32 @bar()

diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
new file mode 100644
index 0000000..4cad837
--- /dev/null
+++ b/test/CodeGen/X86/tls-pic.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+
+define i32 @f1() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+; X32: f1:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f1:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+	ret i32* @i
+}
+
+; X32: f2:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f2:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+
+define i32 @f3() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; X32: f3:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f3:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+define i32* @f4() nounwind {
+entry:
+	ret i32* @i
+}
+
+; X32: f4:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   call ___tls_get_addr@PLT
+
+; X64: f4:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   call __tls_get_addr@PLT
+
+
+

diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
new file mode 100644
index 0000000..0cae5c4
--- /dev/null
+++ b/test/CodeGen/X86/tls1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = thread_local global i32 15
+
+define i32 @f() nounwind {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
new file mode 100644
index 0000000..fb61596
--- /dev/null
+++ b/test/CodeGen/X86/tls10.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = external hidden thread_local global i32
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
new file mode 100644
index 0000000..a2c1a1f
--- /dev/null
+++ b/test/CodeGen/X86/tls11.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movw	%gs:i@NTPOFF, %ax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movw	%fs:i@TPOFF, %ax} %t2
+
+@i = thread_local global i16 15
+
+define i16 @f() {
+entry:
+	%tmp1 = load i16* @i
+	ret i16 %tmp1
+}

diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
new file mode 100644
index 0000000..c29f6ad
--- /dev/null
+++ b/test/CodeGen/X86/tls12.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
+
+@i = thread_local global i8 15
+
+define i8 @f() {
+entry:
+	%tmp1 = load i8* @i
+	ret i8 %tmp1
+}

diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
new file mode 100644
index 0000000..08778ec
--- /dev/null
+++ b/test/CodeGen/X86/tls13.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
+; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
+; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
+
+@i = thread_local global i16 0
+@j = thread_local global i16 0
+
+define void @f() nounwind optsize {
+entry:
+        %0 = load i16* @i, align 2
+        %1 = sext i16 %0 to i32
+        tail call void @g(i32 %1) nounwind
+        %2 = load i16* @j, align 2
+        %3 = zext i16 %2 to i32
+        tail call void @h(i32 %3) nounwind
+        ret void
+}
+
+declare void @g(i32)
+
+declare void @h(i32)

diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
new file mode 100644
index 0000000..88426dd
--- /dev/null
+++ b/test/CodeGen/X86/tls14.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
+; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
+; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
+
+@i = thread_local global i8 0
+@j = thread_local global i8 0
+
+define void @f() nounwind optsize {
+entry:
+        %0 = load i8* @i, align 2
+        %1 = sext i8 %0 to i32
+        tail call void @g(i32 %1) nounwind
+        %2 = load i8* @j, align 2
+        %3 = zext i8 %2 to i32
+        tail call void @h(i32 %3) nounwind
+        ret void
+}
+
+declare void @g(i32)
+
+declare void @h(i32)

diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
new file mode 100644
index 0000000..7abf070
--- /dev/null
+++ b/test/CodeGen/X86/tls15.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t | count 1
+; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
+; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
+; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
+; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
+
+@i = thread_local global i32 0
+@j = thread_local global i32 0
+
+define void @f(i32** %a, i32** %b) {
+entry:
+	store i32* @i, i32** %a, align 8
+	store i32* @j, i32** %b, align 8
+	ret void
+}

diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
new file mode 100644
index 0000000..5a94296
--- /dev/null
+++ b/test/CodeGen/X86/tls2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
new file mode 100644
index 0000000..7327cc4
--- /dev/null
+++ b/test/CodeGen/X86/tls3.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	i@INDNTPOFF, %eax} %t
+; RUN: grep {movl	%gs:(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
+; RUN: grep {movl	%fs:(%rax), %eax} %t2
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32 @f() nounwind {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
new file mode 100644
index 0000000..d2e40e3
--- /dev/null
+++ b/test/CodeGen/X86/tls4.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {addl	i@INDNTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
new file mode 100644
index 0000000..4d2cc02
--- /dev/null
+++ b/test/CodeGen/X86/tls5.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = internal thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
new file mode 100644
index 0000000..505106e
--- /dev/null
+++ b/test/CodeGen/X86/tls6.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = internal thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
new file mode 100644
index 0000000..e9116e7
--- /dev/null
+++ b/test/CodeGen/X86/tls7.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = hidden thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
new file mode 100644
index 0000000..375af94
--- /dev/null
+++ b/test/CodeGen/X86/tls8.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = hidden thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}

diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
new file mode 100644
index 0000000..214146f
--- /dev/null
+++ b/test/CodeGen/X86/tls9.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = external hidden thread_local global i32
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
new file mode 100644
index 0000000..03ae6bf
--- /dev/null
+++ b/test/CodeGen/X86/trap.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
+define i32 @test() noreturn nounwind  {
+entry:
+	tail call void @llvm.trap( )
+	unreachable
+}
+
+declare void @llvm.trap() nounwind 
+

diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
new file mode 100644
index 0000000..bfab1ae
--- /dev/null
+++ b/test/CodeGen/X86/trunc-to-bool.ll

@@ -0,0 +1,54 @@
+; An integer truncation to i1 should be done with an and instruction to make
+; sure only the LSBit survives. Test that this is the case both for a returned
+; value and as the operand of a branch.
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i1 @test1(i32 %X) zeroext {
+    %Y = trunc i32 %X to i1
+    ret i1 %Y
+}
+; CHECK: andl $1, %eax
+
+define i1 @test2(i32 %val, i32 %mask) {
+entry:
+    %shifted = ashr i32 %val, %mask
+    %anded = and i32 %shifted, 1
+    %trunced = trunc i32 %anded to i1
+    br i1 %trunced, label %ret_true, label %ret_false
+ret_true:
+    ret i1 true
+ret_false:
+    ret i1 false
+}
+; CHECK: testb $1, %al
+
+define i32 @test3(i8* %ptr) {
+    %val = load i8* %ptr
+    %tmp = trunc i8 %val to i1
+    br i1 %tmp, label %cond_true, label %cond_false
+cond_true:
+    ret i32 21
+cond_false:
+    ret i32 42
+}
+; CHECK: testb $1, %al
+
+define i32 @test4(i8* %ptr) {
+    %tmp = ptrtoint i8* %ptr to i1
+    br i1 %tmp, label %cond_true, label %cond_false
+cond_true:
+    ret i32 21
+cond_false:
+    ret i32 42
+}
+; CHECK: testb $1, %al
+
+define i32 @test6(double %d) {
+    %tmp = fptosi double %d to i1
+    br i1 %tmp, label %cond_true, label %cond_false
+cond_true:
+    ret i32 21
+cond_false:
+    ret i32 42
+}
+; CHECK: testb $1

diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
new file mode 100644
index 0000000..6f16a25
--- /dev/null
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
+; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
+; rdar://6480363
+
+target triple = "i386-apple-darwin9.6"
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+	%tmp.i3 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2.i = or <2 x i64> %tmp.i3, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
+	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
+	%tmp.i2 = fadd <2 x double> %tmp3.i, %A		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %tmp.i2, %C		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp.i
+}

diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
new file mode 100644
index 0000000..d0e13f6
--- /dev/null
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep mov | count 5
+; rdar://6523745
+
+@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%0 = trunc i32 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = sdiv i8 %0, 2		; <i8> [#uses=1]
+	%2 = sext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind		; <i32> [#uses=0]
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 258		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind

diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
new file mode 100644
index 0000000..a245ed7
--- /dev/null
+++ b/test/CodeGen/X86/twoaddr-lea.ll

@@ -0,0 +1,24 @@
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y.  Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered.  However, this copy is not needed if the register
+;; allocator turns the shift into an LEA.  This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep {mov E.X, E.X}
+
+@G = external global i32                ; <i32*> [#uses=3]
+
+define i32 @test1(i32 %X, i32 %Y) {
+        %Z = add i32 %X, %Y             ; <i32> [#uses=1]
+        volatile store i32 %Y, i32* @G
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}
+
+define i32 @test2(i32 %X) {
+        %Z = add i32 %X, 1              ; <i32> [#uses=1]
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}

diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
new file mode 100644
index 0000000..077fee0
--- /dev/null
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+
+define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
+entry:
+	%tmp25 = bitcast <2 x i64> %a1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	br label %bb
+bb:		; preds = %bb, %entry
+	%skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3		; <i32> [#uses=3]
+	%vDct_addr.0 = getelementptr <2 x i64>* %vDct, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
+	%vYp_addr.0 = getelementptr <2 x i64>* %vYp, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
+	%skiplist_addr.0 = getelementptr i8* %skiplist, i32 %skiplist_addr.0.rec		; <i8*> [#uses=1]
+	%vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1		; <i32> [#uses=1]
+	%tmp7 = getelementptr <2 x i64>* %vDct, i32 %vDct_addr.0.sum43		; <<2 x i64>*> [#uses=1]
+	%tmp8 = load <2 x i64>* %tmp7, align 16		; <<2 x i64>> [#uses=1]
+	%tmp11 = load <2 x i64>* %vDct_addr.0, align 16		; <<2 x i64>> [#uses=1]
+	%tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%tmp26 = mul <8 x i16> %tmp25, %tmp16		; <<8 x i16>> [#uses=1]
+	%tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
+	%tmp37 = load i8* %skiplist_addr.0, align 1		; <i8> [#uses=1]
+	%tmp38 = icmp eq i8 %tmp37, 0		; <i1> [#uses=1]
+	%indvar.next = add i32 %skiplist_addr.0.rec, 1		; <i32> [#uses=1]
+	br i1 %tmp38, label %return, label %bb
+return:		; preds = %bb
+	ret void
+}

diff --git a/test/CodeGen/X86/twoaddr-remat.ll b/test/CodeGen/X86/twoaddr-remat.ll
new file mode 100644
index 0000000..4940c78
--- /dev/null
+++ b/test/CodeGen/X86/twoaddr-remat.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 | grep 59796 | count 3
+
+	%Args = type %Value*
+	%Exec = type opaque*
+	%Identifier = type opaque*
+	%JSFunction = type %Value (%Exec, %Scope, %Value, %Args)
+	%PropertyNameArray = type opaque*
+	%Scope = type opaque*
+	%Value = type opaque*
+
+declare i1 @X1(%Exec) readonly 
+
+declare %Value @X2(%Exec)
+
+declare i32 @X3(%Exec, %Value)
+
+declare %Value @X4(i32) readnone 
+
+define internal %Value @fast3bitlookup(%Exec %exec, %Scope %scope, %Value %this, %Args %args) nounwind {
+prologue:
+	%eh_check = tail call i1 @X1( %Exec %exec ) readonly 		; <i1> [#uses=1]
+	br i1 %eh_check, label %exception, label %no_exception
+
+exception:		; preds = %no_exception, %prologue
+	%rethrow_result = tail call %Value @X2( %Exec %exec )		; <%Value> [#uses=1]
+	ret %Value %rethrow_result
+
+no_exception:		; preds = %prologue
+	%args_intptr = bitcast %Args %args to i32*		; <i32*> [#uses=1]
+	%argc_val = load i32* %args_intptr		; <i32> [#uses=1]
+	%cmpParamArgc = icmp sgt i32 %argc_val, 0		; <i1> [#uses=1]
+	%arg_ptr = getelementptr %Args %args, i32 1		; <%Args> [#uses=1]
+	%arg_val = load %Args %arg_ptr		; <%Value> [#uses=1]
+	%ext_arg_val = select i1 %cmpParamArgc, %Value %arg_val, %Value inttoptr (i32 5 to %Value)		; <%Value> [#uses=1]
+	%toInt325 = tail call i32 @X3( %Exec %exec, %Value %ext_arg_val )		; <i32> [#uses=3]
+	%eh_check6 = tail call i1 @X1( %Exec %exec ) readonly 		; <i1> [#uses=1]
+	br i1 %eh_check6, label %exception, label %no_exception7
+
+no_exception7:		; preds = %no_exception
+	%shl_tmp_result = shl i32 %toInt325, 1		; <i32> [#uses=1]
+	%rhs_masked13 = and i32 %shl_tmp_result, 14		; <i32> [#uses=1]
+	%ashr_tmp_result = lshr i32 59796, %rhs_masked13		; <i32> [#uses=1]
+	%and_tmp_result15 = and i32 %ashr_tmp_result, 3		; <i32> [#uses=1]
+	%ashr_tmp_result3283 = lshr i32 %toInt325, 2		; <i32> [#uses=1]
+	%rhs_masked38 = and i32 %ashr_tmp_result3283, 14		; <i32> [#uses=1]
+	%ashr_tmp_result39 = lshr i32 59796, %rhs_masked38		; <i32> [#uses=1]
+	%and_tmp_result41 = and i32 %ashr_tmp_result39, 3		; <i32> [#uses=1]
+	%addconv = add i32 %and_tmp_result15, %and_tmp_result41		; <i32> [#uses=1]
+	%ashr_tmp_result6181 = lshr i32 %toInt325, 5		; <i32> [#uses=1]
+	%rhs_masked67 = and i32 %ashr_tmp_result6181, 6		; <i32> [#uses=1]
+	%ashr_tmp_result68 = lshr i32 59796, %rhs_masked67		; <i32> [#uses=1]
+	%and_tmp_result70 = and i32 %ashr_tmp_result68, 3		; <i32> [#uses=1]
+	%addconv82 = add i32 %addconv, %and_tmp_result70		; <i32> [#uses=3]
+	%rangetmp = add i32 %addconv82, 536870912		; <i32> [#uses=1]
+	%rangecmp = icmp ult i32 %rangetmp, 1073741824		; <i1> [#uses=1]
+	br i1 %rangecmp, label %NumberLiteralIntFast, label %NumberLiteralIntSlow
+
+NumberLiteralIntFast:		; preds = %no_exception7
+	%imm_shift = shl i32 %addconv82, 2		; <i32> [#uses=1]
+	%imm_or = or i32 %imm_shift, 3		; <i32> [#uses=1]
+	%imm_val = inttoptr i32 %imm_or to %Value		; <%Value> [#uses=1]
+	ret %Value %imm_val
+
+NumberLiteralIntSlow:		; preds = %no_exception7
+	%toVal = call %Value @X4( i32 %addconv82 )		; <%Value> [#uses=1]
+	ret %Value %toVal
+}

diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll
new file mode 100644
index 0000000..da5105d
--- /dev/null
+++ b/test/CodeGen/X86/uint_to_fp-2.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
+; rdar://6504833
+
+define float @f(i32 %x) nounwind readnone {
+entry:
+	%0 = uitofp i32 %x to float
+	ret float %0
+}

diff --git a/test/CodeGen/X86/uint_to_fp.ll b/test/CodeGen/X86/uint_to_fp.ll
new file mode 100644
index 0000000..41ee194
--- /dev/null
+++ b/test/CodeGen/X86/uint_to_fp.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
+; rdar://6034396
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test(i32 %x, float* %y) nounwind  {
+entry:
+	lshr i32 %x, 23		; <i32>:0 [#uses=1]
+	uitofp i32 %0 to float		; <float>:1 [#uses=1]
+	store float %1, float* %y
+	ret void
+}

diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
new file mode 100644
index 0000000..7416051
--- /dev/null
+++ b/test/CodeGen/X86/umul-with-carry.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 | grep {jc} | count 1
+; XFAIL: *
+
+; FIXME: umul-with-overflow not supported yet.
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)

diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
new file mode 100644
index 0000000..d522bd8
--- /dev/null
+++ b/test/CodeGen/X86/umul-with-overflow.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+define i1 @a(i32 %x) zeroext nounwind {
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}

diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
new file mode 100644
index 0000000..b61803d
--- /dev/null
+++ b/test/CodeGen/X86/unaligned-load.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck %s
+
+@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
+@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
+
+define void @func() nounwind ssp {
+entry:
+  %String2Loc = alloca [31 x i8], align 1
+  br label %bb
+
+bb:
+  %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
+  call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
+; CHECK: movups _.str3
+  br label %bb
+
+return:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+; CHECK: .align  3
+; CHECK-NEXT: _.str1:
+; CHECK-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
+; CHECK: .align 3
+; CHECK-NEXT: _.str3:

diff --git a/test/CodeGen/X86/urem-i8-constant.ll b/test/CodeGen/X86/urem-i8-constant.ll
new file mode 100644
index 0000000..e3cb69c
--- /dev/null
+++ b/test/CodeGen/X86/urem-i8-constant.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 | grep 111
+
+define i8 @foo(i8 %tmp325) {
+	%t546 = urem i8 %tmp325, 37
+	ret i8 %t546
+}

diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
new file mode 100644
index 0000000..2dd2a4a
--- /dev/null
+++ b/test/CodeGen/X86/use-add-flags.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86-64 -o - | FileCheck %s
+
+; Reuse the flags value from the add instructions instead of emitting separate
+; testl instructions.
+
+; Use the flags on the add.
+
+; CHECK: add_zf:
+;      CHECK: addl    (%rdi), %esi
+; CHECK-NEXT: movl    %edx, %eax
+; CHECK-NEXT: cmovnsl %ecx, %eax
+; CHECK-NEXT: ret
+
+define i32 @add_zf(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
+	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp2, %y		; <i32> [#uses=1]
+	%tmp5 = icmp slt i32 %tmp4, 0		; <i1> [#uses=1]
+	%tmp.0 = select i1 %tmp5, i32 %a, i32 %b		; <i32> [#uses=1]
+	ret i32 %tmp.0
+}
+
+declare void @foo(i32)
+
+; Don't use the flags result of the and here, since the and has no
+; other use. A simple test is better.
+
+; CHECK: bar:
+; CHECK: testb   $16, %dil
+
+define void @bar(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %x)
+  ret void
+false:
+  ret void
+}
+
+; Do use the flags result of the and here, since the and has another use.
+
+; CHECK: qux:
+;      CHECK: andl    $16, %edi
+; CHECK-NEXT: jne
+
+define void @qux(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %y)
+  ret void
+false:
+  ret void
+}

diff --git a/test/CodeGen/X86/v4f32-immediate.ll b/test/CodeGen/X86/v4f32-immediate.ll
new file mode 100644
index 0000000..b5ebaa7
--- /dev/null
+++ b/test/CodeGen/X86/v4f32-immediate.ll

@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
+
+define <4 x float> @foo() {
+  ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
+}

diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
new file mode 100644
index 0000000..4817db2
--- /dev/null
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+define void @foo(i8* %p, i64 %n) {
+  call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
+  ret void
+}

diff --git a/test/CodeGen/X86/variadic-node-pic.ll b/test/CodeGen/X86/variadic-node-pic.ll
new file mode 100644
index 0000000..1182a30
--- /dev/null
+++ b/test/CodeGen/X86/variadic-node-pic.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -relocation-model=pic -code-model=large
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+declare void @xscanf(i64) nounwind 
+
+define void @foo() nounwind  {
+	call void (i64)* @xscanf( i64 0 ) nounwind
+	unreachable
+}

diff --git a/test/CodeGen/X86/vec-trunc-store.ll b/test/CodeGen/X86/vec-trunc-store.ll
new file mode 100644
index 0000000..ea1a151
--- /dev/null
+++ b/test/CodeGen/X86/vec-trunc-store.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -disable-mmx | grep punpcklwd | count 2
+
+define void @foo() nounwind {
+  %cti69 = trunc <8 x i32> undef to <8 x i16>     ; <<8 x i16>> [#uses=1]
+  store <8 x i16> %cti69, <8 x i16>* undef
+  ret void
+}
+
+define void @bar() nounwind {
+  %cti44 = trunc <4 x i32> undef to <4 x i16>     ; <<4 x i16>> [#uses=1]
+  store <4 x i16> %cti44, <4 x i16>* undef
+  ret void
+}

diff --git a/test/CodeGen/X86/vec_add.ll b/test/CodeGen/X86/vec_add.ll
new file mode 100644
index 0000000..7c77d11
--- /dev/null
+++ b/test/CodeGen/X86/vec_add.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
+entry:
+	%tmp9 = add <2 x i64> %b, %a		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp9
+}

diff --git a/test/CodeGen/X86/vec_align.ll b/test/CodeGen/X86/vec_align.ll
new file mode 100644
index 0000000..e273115
--- /dev/null
+++ b/test/CodeGen/X86/vec_align.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -mcpu=yonah -relocation-model=static | grep movaps | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+%f4 = type <4 x float>
+
+@G = external global { float,float,float,float}, align 16
+
+define %f4 @test1(float %W, float %X, float %Y, float %Z) nounwind {
+        %tmp = insertelement %f4 undef, float %W, i32 0
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1
+        %tmp4 = insertelement %f4 %tmp2, float %Y, i32 2
+        %tmp6 = insertelement %f4 %tmp4, float %Z, i32 3
+	ret %f4 %tmp6
+}
+
+define %f4 @test2() nounwind {
+	%Wp = getelementptr { float,float,float,float}* @G, i32 0, i32 0
+	%Xp = getelementptr { float,float,float,float}* @G, i32 0, i32 1
+	%Yp = getelementptr { float,float,float,float}* @G, i32 0, i32 2
+	%Zp = getelementptr { float,float,float,float}* @G, i32 0, i32 3
+	
+	%W = load float* %Wp
+	%X = load float* %Xp
+	%Y = load float* %Yp
+	%Z = load float* %Zp
+
+        %tmp = insertelement %f4 undef, float %W, i32 0
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1
+        %tmp4 = insertelement %f4 %tmp2, float %Y, i32 2
+        %tmp6 = insertelement %f4 %tmp4, float %Z, i32 3
+	ret %f4 %tmp6
+}
+

diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
new file mode 100644
index 0000000..b3efc7b
--- /dev/null
+++ b/test/CodeGen/X86/vec_call.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN:   grep {subl.*60}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN:   grep {movaps.*32}
+
+
+define void @test() {
+        tail call void @xx( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <2 x i64> bitcast (<4 x i32> < i32 4, i32 3, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 7, i32 6, i32 5 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 6, i32 4, i32 2, i32 0 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 4, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 0, i32 1, i32 3, i32 9 > to <2 x i64>) )
+        ret void
+}
+
+declare void @xx(i32, i32, i32, i32, i32, i32, i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+

diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
new file mode 100644
index 0000000..1f899b3
--- /dev/null
+++ b/test/CodeGen/X86/vec_cast.ll

@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86-64 
+; RUN: llc < %s -march=x86-64 -disable-mmx
+
+define <8 x i32> @a(<8 x i16> %a) nounwind {
+  %c = sext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @b(<3 x i16> %a) nounwind {
+  %c = sext <3 x i16> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @c(<1 x i16> %a) nounwind {
+  %c = sext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i16> %a) nounwind {
+  %c = zext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @e(<3 x i16> %a) nounwind {
+  %c = zext <3 x i16> %a to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @f(<1 x i16> %a) nounwind {
+  %c = zext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+; TODO: Legalize doesn't yet handle this.
+;define <8 x i16> @g(<8 x i32> %a) nounwind {
+;  %c = trunc <8 x i32> %a to <8 x i16>
+;  ret <8 x i16> %c
+;}
+
+define <3 x i16> @h(<3 x i32> %a) nounwind {
+  %c = trunc <3 x i32> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <1 x i16> @i(<1 x i32> %a) nounwind {
+  %c = trunc <1 x i32> %a to <1 x i16>
+  ret <1 x i16> %c
+}

diff --git a/test/CodeGen/X86/vec_clear.ll b/test/CodeGen/X86/vec_clear.ll
new file mode 100644
index 0000000..166d436
--- /dev/null
+++ b/test/CodeGen/X86/vec_clear.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
+; RUN: not grep and %t
+; RUN: not grep psrldq %t
+; RUN: grep xorps %t
+
+define <4 x float> @test(<4 x float>* %v1) nounwind {
+        %tmp = load <4 x float>* %v1            ; <<4 x float>> [#uses=1]
+        %tmp15 = bitcast <4 x float> %tmp to <2 x i64>          ; <<2 x i64>> [#uses=1]
+        %tmp24 = and <2 x i64> %tmp15, bitcast (<4 x i32> < i32 0, i32 0, i32 -1, i32 -1 > to <2 x i64>)              ; <<2 x i64>> [#uses=1]
+        %tmp31 = bitcast <2 x i64> %tmp24 to <4 x float>                ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp31
+}
+

diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
new file mode 100644
index 0000000..091641b
--- /dev/null
+++ b/test/CodeGen/X86/vec_compare-2.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
+entry:
+; CHECK-NOT: set
+; CHECK: pcmpgt
+; CHECK: blendvps
+  %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
+  %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
+  %cmp323.x = icmp slt <4 x i32> zeroinitializer, %sub322.i ; <<4 x i1>> [#uses=1]
+  %cmp323.i = sext <4 x i1> %cmp323.x to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %or.i = or <4 x i32> %cmp318.i, %cmp323.i       ; <<4 x i32>> [#uses=1]
+  %tmp10.i83.i = bitcast <4 x i32> %or.i to <4 x float> ; <<4 x float>> [#uses=1]
+  %0 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> undef, <4 x float> undef, <4 x float> %tmp10.i83.i) nounwind ; <<4 x float>> [#uses=1]
+  %conv.i.i15.i = bitcast <4 x float> %0 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %swz.i.i28.i = shufflevector <4 x i32> %conv.i.i15.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i32>> [#uses=1]
+  %tmp6.i29.i = bitcast <2 x i32> %swz.i.i28.i to <4 x i16> ; <<4 x i16>> [#uses=1]
+  %swz.i30.i = shufflevector <4 x i16> %tmp6.i29.i, <4 x i16> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %swz.i30.i, <2 x i16>* undef
+  unreachable
+  ret void
+}

diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
new file mode 100644
index 0000000..c8c7257
--- /dev/null
+++ b/test/CodeGen/X86/vec_compare.ll

@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: pcmpgtd
+; CHECK: ret
+
+	%C = icmp sgt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: pcmp
+; CHECK: pcmp
+; CHECK: xorps
+; CHECK: ret
+	%C = icmp sge <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: pcmpgtd
+; CHECK: movaps
+; CHECK: ret
+	%C = icmp slt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: movaps
+; CHECK: pcmpgtd
+; CHECK: ret
+	%C = icmp ugt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}

diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
new file mode 100644
index 0000000..f0158d6
--- /dev/null
+++ b/test/CodeGen/X86/vec_ctbits.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64
+
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+
+define <2 x i64> @footz(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}
+define <2 x i64> @foolz(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}
+define <2 x i64> @foopop(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}

diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll
new file mode 100644
index 0000000..8d2a3c3
--- /dev/null
+++ b/test/CodeGen/X86/vec_ext_inreg.ll

@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86-64 
+; RUN: llc < %s -march=x86-64 -disable-mmx
+
+define <8 x i32> @a(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @b(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = sext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @c(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = sext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = zext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @e(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = zext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @f(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = zext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}

diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
new file mode 100644
index 0000000..dab5dd1
--- /dev/null
+++ b/test/CodeGen/X86/vec_extract-sse4.ll

@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: grep extractps   %t | count 1
+; RUN: grep pextrd      %t | count 1
+; RUN: not grep pshufd  %t
+; RUN: not grep movss   %t
+
+define void @t1(float* %R, <4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 3
+	store float %tmp, float* %R
+	ret void
+}
+
+define float @t2(<4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 2
+	ret float %tmp
+}
+
+define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	store i32 %tmp, i32* %R
+	ret void
+}
+
+define i32 @t4(<4 x i32>* %P1) nounwind {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	ret i32 %tmp
+}

diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
new file mode 100644
index 0000000..b013730
--- /dev/null
+++ b/test/CodeGen/X86/vec_extract.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss    %t | count 3
+; RUN: grep movhlps  %t | count 1
+; RUN: grep pshufd   %t | count 1
+; RUN: grep unpckhpd %t | count 1
+
+define void @test1(<4 x float>* %F, float* %f) nounwind {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]
+	store float %tmp2, float* %f
+	ret void
+}
+
+define float @test2(<4 x float>* %F, float* %f) nounwind {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]
+	ret float %tmp2
+}
+
+define void @test3(float* %R, <4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%tmp = extractelement <4 x float> %X, i32 3		; <float> [#uses=1]
+	store float %tmp, float* %R
+	ret void
+}
+
+define double @test4(double %A) nounwind {
+	%tmp1 = call <2 x double> @foo( )		; <<2 x double>> [#uses=1]
+	%tmp2 = extractelement <2 x double> %tmp1, i32 1		; <double> [#uses=1]
+	%tmp3 = fadd double %tmp2, %A		; <double> [#uses=1]
+	ret double %tmp3
+}
+
+declare <2 x double> @foo()

diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
new file mode 100644
index 0000000..d49c70e
--- /dev/null
+++ b/test/CodeGen/X86/vec_fneg.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <4 x float> @t1(<4 x float> %Q) {
+        %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
+	ret <4 x float> %tmp15
+}
+
+define <4 x float> @t2(<4 x float> %Q) {
+        %tmp15 = fsub <4 x float> zeroinitializer, %Q
+	ret <4 x float> %tmp15
+}

diff --git a/test/CodeGen/X86/vec_i64.ll b/test/CodeGen/X86/vec_i64.ll
new file mode 100644
index 0000000..462e16e
--- /dev/null
+++ b/test/CodeGen/X86/vec_i64.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movq %t | count 2
+
+; Used movq to load i64 into a v2i64 when the top i64 is 0.
+
+define <2 x i64> @foo1(i64* %y) nounwind  {
+entry:
+	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	%s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	ret <2 x i64> %loadl
+}
+
+
+define <4 x float> @foo2(i64* %p) nounwind {
+entry:
+	%load = load i64* %p
+	%s2v = insertelement <2 x i64> undef, i64 %load, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	%0 = bitcast <2 x i64> %loadl to <4 x float>
+	ret <4 x float> %0
+}

diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll
new file mode 100644
index 0000000..2951193
--- /dev/null
+++ b/test/CodeGen/X86/vec_ins_extract-1.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+
+; Inserts and extracts with variable indices must be lowered
+; to memory accesses.
+
+define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t13 = insertelement <4 x i32> %t8, i32 76, i32 %t7
+  %t9 = extractelement <4 x i32> %t13, i32 0
+  ret i32 %t9
+}
+define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t13 = insertelement <4 x i32> %t8, i32 76, i32 0
+  %t9 = extractelement <4 x i32> %t13, i32 %t7
+  ret i32 %t9
+}
+define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t9 = extractelement <4 x i32> %t8, i32 %t7
+  %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 0
+  ret <4 x i32> %t13
+}
+define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t9 = extractelement <4 x i32> %t8, i32 0
+  %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 %t7
+  ret <4 x i32> %t13
+}

diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
new file mode 100644
index 0000000..daf222e
--- /dev/null
+++ b/test/CodeGen/X86/vec_ins_extract.ll

@@ -0,0 +1,52 @@
+; RUN: opt < %s -scalarrepl -instcombine | \
+; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp
+
+; This checks that various insert/extract idiom work without going to the
+; stack.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+
+define void @test(<4 x float>* %F, float %f) {
+entry:
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp3, float %f, i32 0		; <<4 x float>> [#uses=2]
+	%tmp6 = fadd <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %F
+	ret void
+}
+
+define void @test2(<4 x float>* %F, float %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%tmp.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
+	store float %f, float* %tmp.upgrd.1
+	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %F
+	ret void
+}
+
+define void @test3(<4 x float>* %F, float* %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
+	%tmp.upgrd.3 = load float* %tmp.upgrd.2		; <float> [#uses=1]
+	store float %tmp.upgrd.3, float* %f
+	ret void
+}
+
+define void @test4(<4 x float>* %F, float* %f) {
+entry:
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp5.lhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp5.rhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp5 = fadd float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]
+	store float %tmp5, float* %f
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll
new file mode 100644
index 0000000..b08044b
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-2.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
+
+define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
+        %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
+        ret <4 x float> %tmp1
+}
+
+define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 3
+        ret <4 x i32> %tmp1
+}
+
+define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
+        %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
+        ret <2 x double> %tmp1
+}
+
+define <8 x i16> @t4(i16 %s, <8 x i16> %tmp) nounwind {
+        %tmp1 = insertelement <8 x i16> %tmp, i16 %s, i32 5
+        ret <8 x i16> %tmp1
+}

diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll
new file mode 100644
index 0000000..a18cd864
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-3.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+
+define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
+        %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
+        ret <2 x i64> %tmp1
+}

diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
new file mode 100644
index 0000000..291fc04
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-5.ll

@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep psllq %t | grep 32
+; RUN: grep pslldq %t | grep 12
+; RUN: grep psrldq %t | grep 8
+; RUN: grep psrldq %t | grep 12
+
+define void  @t1(i32 %a, <1 x i64>* %P) nounwind {
+       %tmp12 = shl i32 %a, 12
+       %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
+       %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
+       %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+       store <1 x i64> %tmp23, <1 x i64>* %P
+       ret void
+}
+
+define <4 x float> @t2(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+        ret <4 x float> %tmp2
+}
+
+define <4 x float> @t3(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
+        ret <4 x float> %tmp2
+}
+
+define <4 x float> @t4(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
+        ret <4 x float> %tmp2
+}

diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
new file mode 100644
index 0000000..54aa43f
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-6.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
+
+define <4 x float> @t3(<4 x float>* %P) nounwind  {
+	%tmp1 = load <4 x float>* %P
+	%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+	ret <4 x float> %tmp2
+}

diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
new file mode 100644
index 0000000..9ede10f
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-7.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+
+define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind  {
+entry:
+	%tmp3 = insertelement <2 x i32> %x, i32 32, i32 0		; <<2 x i32>> [#uses=1]
+	%tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1		; <<2 x i32>> [#uses=1]
+	ret <2 x i32> %tmp8
+}

diff --git a/test/CodeGen/X86/vec_insert-8.ll b/test/CodeGen/X86/vec_insert-8.ll
new file mode 100644
index 0000000..650951c
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-8.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+
+; tests variable insert and extract of a 4 x i32
+
+define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+entry:
+	%tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp3
+}
+
+define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind  {
+entry:
+	%tmp3 = extractelement <4 x i32> %x, i32 %idx		; <<i32>> [#uses=1]
+	ret i32 %tmp3
+}

diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
new file mode 100644
index 0000000..a7274a9
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
+
+define void @test(<4 x float>* %F, i32 %I) {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
+	%f = sitofp i32 %I to float		; <float> [#uses=1]
+	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0		; <<4 x float>> [#uses=2]
+	%tmp18 = fadd <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp18, <4 x float>* %F
+	ret void
+}
+
+define void @test2(<4 x float>* %F, i32 %I, float %g) {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
+	%f = sitofp i32 %I to float		; <float> [#uses=1]
+	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp1, <4 x float>* %F
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_insert_4.ll b/test/CodeGen/X86/vec_insert_4.ll
new file mode 100644
index 0000000..2c31e56
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert_4.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.2.2"
+
+define <8 x float> @f(<8 x float> %a, i32 %b) nounwind  {
+entry:
+	%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b		; <<4 x float>> [#uses=1]
+	ret <8 x float> %vecins
+}

diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
new file mode 100644
index 0000000..8812c4f
--- /dev/null
+++ b/test/CodeGen/X86/vec_loadsingles.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load float* %p
+	%vecins = insertelement <4 x float> undef, float %tmp1, i32 0
+	%add.ptr = getelementptr float* %p, i32 1
+	%tmp5 = load float* %add.ptr
+	%vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
+	ret <4 x float> %vecins7
+}
+

diff --git a/test/CodeGen/X86/vec_logical.ll b/test/CodeGen/X86/vec_logical.ll
new file mode 100644
index 0000000..1dc0b16
--- /dev/null
+++ b/test/CodeGen/X86/vec_logical.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep xorps %t | count 2
+; RUN: grep andnps %t
+; RUN: grep movaps %t | count 2
+
+define void @t(<4 x float> %A) {
+	%tmp1277 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %A
+	store <4 x float> %tmp1277, <4 x float>* null
+	ret void
+}
+
+define <4 x float> @t1(<4 x float> %a, <4 x float> %b) {
+entry:
+	%tmp9 = bitcast <4 x float> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp10 = bitcast <4 x float> %b to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp11 = xor <4 x i32> %tmp9, %tmp10		; <<4 x i32>> [#uses=1]
+	%tmp13 = bitcast <4 x i32> %tmp11 to <4 x float>		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+}
+
+define <2 x double> @t2(<2 x double> %a, <2 x double> %b) {
+entry:
+	%tmp9 = bitcast <2 x double> %a to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp10 = bitcast <2 x double> %b to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp11 = and <2 x i64> %tmp9, %tmp10		; <<2 x i64>> [#uses=1]
+	%tmp13 = bitcast <2 x i64> %tmp11 to <2 x double>		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp13
+}
+
+define void @t3(<4 x float> %a, <4 x float> %b, <4 x float>* %c, <4 x float>* %d) {
+entry:
+	%tmp3 = load <4 x float>* %c		; <<4 x float>> [#uses=1]
+	%tmp11 = bitcast <4 x float> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp12 = bitcast <4 x float> %b to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp13 = xor <4 x i32> %tmp11, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp14 = and <4 x i32> %tmp12, %tmp13		; <<4 x i32>> [#uses=1]
+	%tmp27 = bitcast <4 x float> %tmp3 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp28 = or <4 x i32> %tmp14, %tmp27		; <<4 x i32>> [#uses=1]
+	%tmp30 = bitcast <4 x i32> %tmp28 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp30, <4 x float>* %d
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
new file mode 100644
index 0000000..66762b4
--- /dev/null
+++ b/test/CodeGen/X86/vec_return.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep xorps %t | count 1
+; RUN: grep movaps %t | count 1
+; RUN: not grep shuf %t
+
+define <2 x double> @test() {
+	ret <2 x double> zeroinitializer
+}
+
+define <4 x i32> @test2() nounwind  {
+	ret <4 x i32> < i32 0, i32 0, i32 1, i32 0 >
+}

diff --git a/test/CodeGen/X86/vec_select.ll b/test/CodeGen/X86/vec_select.ll
new file mode 100644
index 0000000..033e9f7
--- /dev/null
+++ b/test/CodeGen/X86/vec_select.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse
+
+define void @test(i32 %C, <4 x float>* %A, <4 x float>* %B) {
+        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
+        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
+        %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
+        store <4 x float> %iftmp.38.0, <4 x float>* %A
+        ret void
+}
+

diff --git a/test/CodeGen/X86/vec_set-2.ll b/test/CodeGen/X86/vec_set-2.ll
new file mode 100644
index 0000000..a8f1187
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-2.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+
+define <4 x float> @test1(float %a) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp7 = insertelement <4 x float> %tmp6, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp7
+}
+
+define <2 x i64> @test(i32 %a) nounwind {
+	%tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp6 = insertelement <4 x i32> %tmp, i32 0, i32 1		; <<8 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2		; <<8 x i32>> [#uses=1]
+	%tmp10 = insertelement <4 x i32> %tmp8, i32 0, i32 3		; <<8 x i32>> [#uses=1]
+	%tmp19 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp19
+}

diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
new file mode 100644
index 0000000..ada17e0
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-3.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep pshufd %t | count 2
+
+define <4 x float> @test(float %a) nounwind {
+        %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1               ; <<4 x float>> [#uses=1]
+        %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2               ; <<4 x float>> [#uses=1]
+        %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3              ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp6
+}
+
+define <2 x i64> @test2(i32 %a) nounwind {
+        %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2          ; <<4 x i32>> [#uses=1]
+        %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3             ; <<4 x i32>> [#uses=1]
+        %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>           ; <<2 x i64>> [#uses=1]
+        ret <2 x i64> %tmp10
+}
+

diff --git a/test/CodeGen/X86/vec_set-4.ll b/test/CodeGen/X86/vec_set-4.ll
new file mode 100644
index 0000000..332c8b7
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-4.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pinsrw | count 2
+
+define <2 x i64> @test(i16 %a) nounwind {
+entry:
+	%tmp10 = insertelement <8 x i16> zeroinitializer, i16 %a, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp10, i16 0, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp12, i16 0, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp16 = insertelement <8 x i16> %tmp14, i16 0, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp18 = insertelement <8 x i16> %tmp16, i16 0, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp19 = bitcast <8 x i16> %tmp18 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp19
+}
+
+define <2 x i64> @test2(i8 %a) nounwind {
+entry:
+	%tmp24 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp24, i8 0, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp26, i8 0, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp28, i8 0, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp30, i8 0, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp34 = insertelement <16 x i8> %tmp32, i8 0, i32 15		; <<16 x i8>> [#uses=1]
+	%tmp35 = bitcast <16 x i8> %tmp34 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp35
+}

diff --git a/test/CodeGen/X86/vec_set-5.ll b/test/CodeGen/X86/vec_set-5.ll
new file mode 100644
index 0000000..f811a74
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-5.ll

@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movlhps   %t | count 1
+; RUN: grep movq      %t | count 2
+
+define <4 x float> @test1(float %a, float %b) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp8 = insertelement <4 x float> %tmp6, float %b, i32 2		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp9
+}
+
+define <4 x float> @test2(float %a, float %b) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp7 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp8 = insertelement <4 x float> %tmp7, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp9
+}
+
+define <2 x i64> @test3(i32 %a, i32 %b) nounwind {
+	%tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp6 = insertelement <4 x i32> %tmp, i32 %b, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp10 = insertelement <4 x i32> %tmp8, i32 0, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp11 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}

diff --git a/test/CodeGen/X86/vec_set-6.ll b/test/CodeGen/X86/vec_set-6.ll
new file mode 100644
index 0000000..0713d95
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-6.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movss    %t | count 1
+; RUN: grep movq     %t | count 1
+; RUN: grep shufps   %t | count 1
+
+define <4 x float> @test(float %a, float %b, float %c) nounwind {
+        %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1               ; <<4 x float>> [#uses=1]
+        %tmp8 = insertelement <4 x float> %tmp, float %b, i32 2         ; <<4 x float>> [#uses=1]
+        %tmp10 = insertelement <4 x float> %tmp8, float %c, i32 3               ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp10
+}
+

diff --git a/test/CodeGen/X86/vec_set-7.ll b/test/CodeGen/X86/vec_set-7.ll
new file mode 100644
index 0000000..d993178
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-7.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
+
+define <2 x i64> @test(<2 x i64>* %p) nounwind {
+	%tmp = bitcast <2 x i64>* %p to double*		
+	%tmp.upgrd.1 = load double* %tmp	
+	%tmp.upgrd.2 = insertelement <2 x double> undef, double %tmp.upgrd.1, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.2, double 0.0, i32 1
+	%tmp.upgrd.3 = bitcast <2 x double> %tmp5 to <2 x i64>
+	ret <2 x i64> %tmp.upgrd.3
+}
+

diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
new file mode 100644
index 0000000..9697f11
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-8.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | not grep movsd
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi,.*%xmm0}
+
+define <2 x i64> @test(i64 %i) nounwind  {
+entry:
+	%tmp10 = insertelement <2 x i64> undef, i64 %i, i32 0
+	%tmp11 = insertelement <2 x i64> %tmp10, i64 0, i32 1
+	ret <2 x i64> %tmp11
+}
+

diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
new file mode 100644
index 0000000..3656e5f
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-9.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+
+define <2 x i64> @test3(i64 %A) nounwind {
+entry:
+	%B = insertelement <2 x i64> undef, i64 %A, i32 1
+	ret <2 x i64> %B
+}
+

diff --git a/test/CodeGen/X86/vec_set-A.ll b/test/CodeGen/X86/vec_set-A.ll
new file mode 100644
index 0000000..f05eecf
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-A.ll

@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
+define <2 x i64> @test1() nounwind {
+entry:
+	ret <2 x i64> < i64 1, i64 0 >
+}
+

diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll
new file mode 100644
index 0000000..f5b3e8b
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-B.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
+
+; These should both generate something like this:
+;_test3:
+;	movl	$1234567, %eax
+;	andl	4(%esp), %eax
+;	movd	%eax, %xmm0
+;	ret
+
+define <2 x i64> @test3(i64 %arg) nounwind {
+entry:
+        %A = and i64 %arg, 1234567
+        %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
+        ret <2 x i64> %B
+}
+
+define <2 x i64> @test2(i64 %arg) nounwind {
+entry:
+	%A = and i64 %arg, 1234567
+	%B = insertelement <2 x i64> undef, i64 %A, i32 0
+	ret <2 x i64> %B
+}
+

diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll
new file mode 100644
index 0000000..7636ac3
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-C.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd
+
+define <2 x i64> @t1(i64 %x) nounwind  {
+	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
+	ret <2 x i64> %tmp8
+}

diff --git a/test/CodeGen/X86/vec_set-D.ll b/test/CodeGen/X86/vec_set-D.ll
new file mode 100644
index 0000000..3d6369e
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-D.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
+	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %y, i32 1
+	ret <4 x i32> %tmp2
+}

diff --git a/test/CodeGen/X86/vec_set-E.ll b/test/CodeGen/X86/vec_set-E.ll
new file mode 100644
index 0000000..d78be66
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-E.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @t(float %X) nounwind  {
+	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
+	%tmp12 = insertelement <4 x float> %tmp11, float %X, i32 1
+	%tmp27 = insertelement <4 x float> %tmp12, float 0.000000e+00, i32 2
+	%tmp28 = insertelement <4 x float> %tmp27, float 0.000000e+00, i32 3
+	ret <4 x float> %tmp28
+}

diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
new file mode 100644
index 0000000..4f0acb2
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-F.ll

@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
+
+define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
+	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
+	%tmp615 = load <2 x i32>* %tmp45
+	%tmp7 = bitcast <2 x i32> %tmp615 to i64
+	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %tmp7, i32 0
+	ret <2 x i64> %tmp8
+}
+
+define <2 x i64> @t2(i64 %x) nounwind  {
+	%tmp717 = bitcast i64 %x to double
+	%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0
+	%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1
+	%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>
+	ret <2 x i64> %tmp11
+}

diff --git a/test/CodeGen/X86/vec_set-G.ll b/test/CodeGen/X86/vec_set-G.ll
new file mode 100644
index 0000000..4a542fe
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-G.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
+
+define fastcc void @t(<4 x float> %A) nounwind  {
+	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
+	%tmp14082 = insertelement <4 x float> < float 0.000000e+00, float undef, float undef, float undef >, float %tmp41896, i32 1		; <<4 x float>> [#uses=1]
+	%tmp14083 = insertelement <4 x float> %tmp14082, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp14083, <4 x float>* null, align 16
+        ret void
+}

diff --git a/test/CodeGen/X86/vec_set-H.ll b/test/CodeGen/X86/vec_set-H.ll
new file mode 100644
index 0000000..5037e36
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-H.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movz
+
+define <2 x i64> @doload64(i16 signext  %x) nounwind  {
+entry:
+	%tmp36 = insertelement <8 x i16> undef, i16 %x, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp37 = insertelement <8 x i16> %tmp36, i16 %x, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp38 = insertelement <8 x i16> %tmp37, i16 %x, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp39 = insertelement <8 x i16> %tmp38, i16 %x, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp40 = insertelement <8 x i16> %tmp39, i16 %x, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp41 = insertelement <8 x i16> %tmp40, i16 %x, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp42 = insertelement <8 x i16> %tmp41, i16 %x, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp43 = insertelement <8 x i16> %tmp42, i16 %x, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp46 = bitcast <8 x i16> %tmp43 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp46
+}

diff --git a/test/CodeGen/X86/vec_set-I.ll b/test/CodeGen/X86/vec_set-I.ll
new file mode 100644
index 0000000..64f36f9
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-I.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
+
+define void @t1() nounwind  {
+	%tmp298.i.i = load <4 x float>* null, align 16
+	%tmp304.i.i = bitcast <4 x float> %tmp298.i.i to <4 x i32>
+	%tmp305.i.i = and <4 x i32> %tmp304.i.i, < i32 -1, i32 0, i32 0, i32 0 >
+	store <4 x i32> %tmp305.i.i, <4 x i32>* null, align 16
+	unreachable
+}

diff --git a/test/CodeGen/X86/vec_set-J.ll b/test/CodeGen/X86/vec_set-J.ll
new file mode 100644
index 0000000..d90ab85
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-J.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
+; PR2472
+
+define <4 x i32> @a(<4 x i32> %a) nounwind {
+entry:
+        %vecext = extractelement <4 x i32> %a, i32 0
+        insertelement <4 x i32> zeroinitializer, i32 %vecext, i32 0
+        %add = add <4 x i32> %a, %0
+        ret <4 x i32> %add
+}

diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
new file mode 100644
index 0000000..c316df8
--- /dev/null
+++ b/test/CodeGen/X86/vec_set.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpckl | count 7
+
+define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
+        %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]
+        %tmp2 = insertelement <8 x i16> %tmp, i16 %a1, i32 1            ; <<8 x i16>> [#uses=1]
+        %tmp4 = insertelement <8 x i16> %tmp2, i16 %a2, i32 2           ; <<8 x i16>> [#uses=1]
+        %tmp6 = insertelement <8 x i16> %tmp4, i16 %a3, i32 3           ; <<8 x i16>> [#uses=1]
+        %tmp8 = insertelement <8 x i16> %tmp6, i16 %a4, i32 4           ; <<8 x i16>> [#uses=1]
+        %tmp10 = insertelement <8 x i16> %tmp8, i16 %a5, i32 5          ; <<8 x i16>> [#uses=1]
+        %tmp12 = insertelement <8 x i16> %tmp10, i16 %a6, i32 6         ; <<8 x i16>> [#uses=1]
+        %tmp14 = insertelement <8 x i16> %tmp12, i16 %a7, i32 7         ; <<8 x i16>> [#uses=1]
+        store <8 x i16> %tmp14, <8 x i16>* %b
+        ret void
+}
+

diff --git a/test/CodeGen/X86/vec_shift.ll b/test/CodeGen/X86/vec_shift.ll
new file mode 100644
index 0000000..ddf0469
--- /dev/null
+++ b/test/CodeGen/X86/vec_shift.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psrlq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+
+define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %c to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %b1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp8, <8 x i16> %tmp6 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+define <2 x i64> @t3(<2 x i64> %b1, i32 %c) nounwind  {
+entry:
+	%tmp2 = bitcast <2 x i64> %b1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp4 = insertelement <4 x i32> undef, i32 %c, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp8 = bitcast <4 x i32> %tmp4 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = tail call <8 x i16> @llvm.x86.sse2.psra.w( <8 x i16> %tmp2, <8 x i16> %tmp8 )		; <<8 x i16>> [#uses=1]
+	%tmp11 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}
+
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+define <2 x i64> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+entry:
+	%tmp9 = tail call <2 x i64> @llvm.x86.sse2.psrl.q( <2 x i64> %b1, <2 x i64> %c ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp9
+}
+
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 

diff --git a/test/CodeGen/X86/vec_shift2.ll b/test/CodeGen/X86/vec_shift2.ll
new file mode 100644
index 0000000..c5f9dc4
--- /dev/null
+++ b/test/CodeGen/X86/vec_shift2.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep CPI
+
+define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+	%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
+	%tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
+	%tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
+	ret <2 x i64> %tmp3
+}
+
+define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+	%tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
+	%tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 

diff --git a/test/CodeGen/X86/vec_shift3.ll b/test/CodeGen/X86/vec_shift3.ll
new file mode 100644
index 0000000..1ebf455
--- /dev/null
+++ b/test/CodeGen/X86/vec_shift3.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 2
+
+define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind  {
+entry:
+	%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 %bits ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @t2(<2 x i64> %x1) nounwind  {
+entry:
+	%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 10 ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @t3(<2 x i64> %x1, i32 %bits) nounwind  {
+entry:
+	%tmp2 = bitcast <2 x i64> %x1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w( <8 x i16> %tmp2, i32 %bits ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5 = bitcast <8 x i16> %tmp4 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp5
+}
+
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 

diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll
new file mode 100644
index 0000000..a63e386
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-10.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep unpcklps %t | count 1
+; RUN: grep pshufd   %t | count 1
+; RUN: not grep {sub.*esp} %t
+
+define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) {
+	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
+	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
+	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
+	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
+	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp13, <4 x float>* %res
+	ret void
+}
+
+define void @test2(<4 x float> %X, <4 x float>* %res) {
+	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp5, <4 x float>* %res
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_shuffle-11.ll b/test/CodeGen/X86/vec_shuffle-11.ll
new file mode 100644
index 0000000..640745a
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-11.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
+
+define <4 x i32> @test() nounwind {
+        %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 )         ; <<2 x i64>> [#uses=1]
+        %tmp137 = bitcast <2 x i64> %tmp131 to <4 x i32>                ; <<4 x i32>> [#uses=1]
+        %tmp138 = and <4 x i32> %tmp137, bitcast (<2 x i64> < i64 -1, i64 -1 > to <4 x i32>)            ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp138
+}
+
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32)

diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
new file mode 100644
index 0000000..f0cfc44
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-14.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+
+define <4 x i32> @t1(i32 %a) nounwind  {
+entry:
+        %tmp = insertelement <4 x i32> undef, i32 %a, i32 0
+	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> < i32 4, i32 1, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @t2(i64 %a) nounwind  {
+entry:
+        %tmp = insertelement <2 x i64> undef, i64 %a, i32 0
+	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %tmp, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <2 x i64> %tmp6
+}
+
+define <2 x i64> @t3(<2 x i64>* %a) nounwind  {
+entry:
+	%tmp4 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%tmp6 = bitcast <2 x i64> %tmp4 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp6, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	%tmp8 = bitcast <4 x i32> %tmp7 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp8
+}
+
+define <2 x i64> @t4(<2 x i64> %a) nounwind  {
+entry:
+	%tmp5 = bitcast <2 x i64> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp5, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	%tmp7 = bitcast <4 x i32> %tmp6 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp7
+}
+
+define <2 x i64> @t5(<2 x i64> %a) nounwind  {
+entry:
+	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <2 x i64> %tmp6
+}

diff --git a/test/CodeGen/X86/vec_shuffle-15.ll b/test/CodeGen/X86/vec_shuffle-15.ll
new file mode 100644
index 0000000..5a9b8fd
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-15.ll

@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t01(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t02(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t03(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t10(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t11(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t12(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t20(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t21(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t22(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t23(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t30(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t31(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t32(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t33(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 3 >
+	ret <2 x i64> %tmp
+}

diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
new file mode 100644
index 0000000..470f676
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-16.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t
+; RUN: grep shufps %t | count 4
+; RUN: grep movaps %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
+; RUN: grep pshufd %t | count 4
+; RUN: not grep shufps %t
+; RUN: not grep mov %t
+
+define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
+        %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp1
+}
+
+define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
+	ret <4 x float> %tmp
+}
+
+define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 4, i32 4, i32 4, i32 4 >
+	ret <4 x float> %tmp
+}
+
+define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 1, i32 3, i32 2, i32 0 >
+	ret <4 x float> %tmp
+}

diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
new file mode 100644
index 0000000..9c33abb
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-17.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {movd.*%rdi, %xmm0}
+; RUN: llc < %s -march=x86-64 | not grep xor
+; PR2108
+
+define <2 x i64> @doload64(i64 %x) nounwind  {
+entry:
+	%tmp717 = bitcast i64 %x to double		; <double> [#uses=1]
+	%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0		; <<2 x double>> [#uses=1]
+	%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
+	%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}
+

diff --git a/test/CodeGen/X86/vec_shuffle-18.ll b/test/CodeGen/X86/vec_shuffle-18.ll
new file mode 100644
index 0000000..1104a4a
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-18.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+
+	%struct.vector4_t = type { <4 x float> }
+
+define void @swizzle(i8* %a, %struct.vector4_t* %b, %struct.vector4_t* %c) nounwind  {
+entry:
+	%tmp9 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%tmp10 = load <4 x float>* %tmp9, align 16		; <<4 x float>> [#uses=1]
+	%tmp14 = bitcast i8* %a to double*		; <double*> [#uses=1]
+	%tmp15 = load double* %tmp14		; <double> [#uses=1]
+	%tmp16 = insertelement <2 x double> undef, double %tmp15, i32 0		; <<2 x double>> [#uses=1]
+	%tmp18 = bitcast <2 x double> %tmp16 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp19 = shufflevector <4 x float> %tmp10, <4 x float> %tmp18, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp19, <4 x float>* %tmp9, align 16
+	%tmp28 = getelementptr %struct.vector4_t* %c, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%tmp29 = load <4 x float>* %tmp28, align 16		; <<4 x float>> [#uses=1]
+	%tmp26 = getelementptr i8* %a, i32 8		; <i8*> [#uses=1]
+	%tmp33 = bitcast i8* %tmp26 to double*		; <double*> [#uses=1]
+	%tmp34 = load double* %tmp33		; <double> [#uses=1]
+	%tmp35 = insertelement <2 x double> undef, double %tmp34, i32 0		; <<2 x double>> [#uses=1]
+	%tmp37 = bitcast <2 x double> %tmp35 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp38 = shufflevector <4 x float> %tmp29, <4 x float> %tmp37, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp38, <4 x float>* %tmp28, align 16
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
new file mode 100644
index 0000000..9fc09df
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-19.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; PR2485
+
+define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
+entry:
+	%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> < i32 4, i32 0, i32 0, i32 0 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %shuffle
+}

diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
new file mode 100644
index 0000000..6d1bac0
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-20.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+
+define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
+entry:
+	shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:0 [#uses=1]
+	ret <4 x float> %0
+}

diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll
new file mode 100644
index 0000000..6807e4d
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-22.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium-m  | FileCheck %s
+
+define <4 x float> @t1(<4 x float> %a) nounwind  {
+; CHECK: movlhps
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >       ; <<4 x float>> [#uses=1]
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @t2(<4 x i32>* %a) nounwind {
+; CHECK: pshufd
+; CHECK: ret
+  %tmp1 = load <4 x i32>* %a
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp2
+}

diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
new file mode 100644
index 0000000..05a3a1e
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-23.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2                | not grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2                |     grep pshufd
+
+define i32 @t() nounwind {
+entry:
+	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
+	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %b
+	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %punpckldq, <4 x i32>* %b
+	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
+	ret i32 %result
+}

diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
new file mode 100644
index 0000000..7562f1d
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-24.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  |     grep punpck
+
+define i32 @t() nounwind optsize {
+entry:
+	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
+	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %b
+	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %punpckldq, <4 x i32>* %b
+	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
+	ret i32 %result
+}

diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
new file mode 100644
index 0000000..d9b2388
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-25.ll

@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep unpcklps %t | count 3
+; RUN: grep unpckhps %t | count 1
+ 
+; Transpose example using the more generic vector shuffle.  We return
+; float8 instead of float16 since x86 can return that in register.
+; ModuleID = 'transpose2_opt.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-apple-cl.1.0"
+@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+
+define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
+entry:
+	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpcklps14a = shufflevector <4 x float> %unpcklps14,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	%unpckhps17a = shufflevector <4 x float> %unpckhps17,  <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r1 = shufflevector <16 x float> %unpcklps14a,  <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpcklps20a = shufflevector <4 x float> %unpcklps20,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r2 = shufflevector <16 x float> %r1,  <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	%unpckhps23a = shufflevector <4 x float> %unpckhps23,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r3 = shufflevector <16 x float> %r2,  <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+	%r4 = shufflevector <16 x float> %r3,  <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x float> %r4
+}

diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
new file mode 100644
index 0000000..086af6b
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-26.ll

@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep unpcklps %t | count 1
+; RUN: grep unpckhps %t | count 3
+
+; Transpose example using the more generic vector shuffle. Return float8
+; instead of float16
+; ModuleID = 'transpose2_opt.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-apple-cl.1.0"
+@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+
+define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
+entry:
+	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+        %r1 = shufflevector <4 x float> %unpcklps14,  <4 x float> %unpckhps17,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+        %r2 = shufflevector <4 x float> %unpcklps20,  <4 x float> %unpckhps23,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+;       %r3 = shufflevector <8 x float> %r1,  <8 x float> %r2,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; 
+	ret <8 x float> %r2
+}

diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
new file mode 100644
index 0000000..d700ccb
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-27.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep addps %t | count 2
+; RUN: grep mulps %t | count 2
+; RUN: grep subps %t | count 2
+
+; ModuleID = 'vec_shuffle-27.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-apple-cl.1.0"
+
+define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
+	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
+	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
+	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
+	ret <8 x float> %add
+}

diff --git a/test/CodeGen/X86/vec_shuffle-28.ll b/test/CodeGen/X86/vec_shuffle-28.ll
new file mode 100644
index 0000000..343685b
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-28.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep pshufb %t | count 1
+
+; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
+;        Don't XFAIL it because it's still better than the previous code.
+
+; Pack various elements via shuffles.
+define <8 x i16> @shuf1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+}

diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
new file mode 100644
index 0000000..f4930b0
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-3.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movlhps %t | count 1
+; RUN: grep movhlps %t | count 1
+
+define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
+        %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+        %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp27
+}
+
+define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) {
+entry:
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
+        %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp4
+}

diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
new file mode 100644
index 0000000..3f69150
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-30.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -disable-mmx -o %t
+; RUN: grep pshufhw %t | grep -- -95 | count 1
+; RUN: grep shufps %t | count 1
+; RUN: not grep pslldq %t
+
+; Test case when creating pshufhw, we incorrectly set the higher order bit
+; for an undef,
+define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
+entry:
+  %0 = load <8 x i16>* %dest
+  %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
+  store <8 x i16> %1, <8 x i16>* %dest
+  ret void
+}                              
+
+; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
+define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
+entry:
+  %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
+  store <4 x i32> %0, <4 x i32>* %dest
+  ret void
+}

diff --git a/test/CodeGen/X86/vec_shuffle-31.ll b/test/CodeGen/X86/vec_shuffle-31.ll
new file mode 100644
index 0000000..bb06e15
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-31.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep pshufb %t | count 1
+
+define <8 x i16> @shuf3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+}

diff --git a/test/CodeGen/X86/vec_shuffle-34.ll b/test/CodeGen/X86/vec_shuffle-34.ll
new file mode 100644
index 0000000..d057b3f
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-34.ll

@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 | grep pshufb | count 2
+
+define <8 x i16> @shuf2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp8
+}

diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll
new file mode 100644
index 0000000..7f0fcb5
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-35.ll

@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
+; RUN: grep pextrw %t | count 13
+; RUN: grep pinsrw %t | count 14
+; RUN: grep rolw %t | count 13
+; RUN: not grep esp %t
+; RUN: not grep ebp %t
+; RUN: llc < %s -march=x86 -mcpu=core2 -stack-alignment=16 -o %t
+; RUN: grep pshufb %t | count 3
+
+define <16 x i8> @shuf1(<16 x i8> %T0) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> undef, <16 x i32> < i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
+	ret <16 x i8> %tmp8
+}
+
+define <16 x i8> @shuf2(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> < i32 undef, i32 undef, i32 3, i32 2, i32 17, i32 16, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
+	ret <16 x i8> %tmp8
+}

diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
new file mode 100644
index 0000000..8a93a7e
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-36.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep pshufb %t | count 1
+
+
+define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef >
+	ret <8 x i16> %tmp9
+}

diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll
new file mode 100644
index 0000000..829fedf
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-4.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep shuf %t | count 2
+; RUN: not grep unpck %t
+
+define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) {
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
+        %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
+        %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp11, <4 x float>* %res
+        ret void
+}
+

diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
new file mode 100644
index 0000000..c24167a
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-5.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movhlps %t | count 1
+; RUN: grep shufps  %t | count 1
+
+define void @test() nounwind {
+        %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
+        %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp4, <4 x float>* null
+        ret void
+}
+

diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll
new file mode 100644
index 0000000..f034b0a
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-6.ll

@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movapd %t | count 1
+; RUN: grep movaps %t | count 1
+; RUN: grep movups %t | count 2
+
+target triple = "i686-apple-darwin"
+@x = global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]		; <[4 x i32]*> [#uses=4]
+
+define <2 x i64> @test1() {
+	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp16
+}
+
+define <4 x float> @test2(i32 %dummy, float %a, float %b, float %c, float %d) {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+}
+
+define <4 x float> @test3(float %a, float %b, float %c, float %d) {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+}
+
+define <2 x double> @test4(double %a, double %b) {
+	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
+	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp7
+}

diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
new file mode 100644
index 0000000..4cdca09
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-7.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep xorps %t | count 1
+; RUN: not grep shufps %t
+
+define void @test() {
+        bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
+        shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
+        store <4 x float> %2, <4 x float>* null
+        unreachable
+}
+

diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll
new file mode 100644
index 0000000..964ce7b
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-8.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | \
+; RUN:   not grep shufps
+
+define void @test(<4 x float>* %res, <4 x float>* %A) {
+        %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp2, <4 x float>* %res
+        ret void
+}
+

diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
new file mode 100644
index 0000000..fc16a26
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-9.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define <4 x i32> @test(i8** %ptr) {
+; CHECK: xorps
+; CHECK: punpcklbw
+; CHECK: punpcklwd
+
+	%tmp = load i8** %ptr		; <i8*> [#uses=1]
+	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
+	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
+	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp36
+}

diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
new file mode 100644
index 0000000..c05b79a
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle.ll

@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep shufp   %t | count 1
+; RUN: grep movupd  %t | count 1
+; RUN: grep pshufhw %t | count 1
+
+define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
+	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
+	%tmp4 = insertelement <4 x float> %tmp2, float %Y, i32 2		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp4, float %Y, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %P
+	ret void
+}
+
+define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
+	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
+	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
+	store <2 x double> %tmp2, <2 x double>* %P
+	ret void
+}
+
+define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]
+	%tmp.upgrd.2 = extractelement <8 x i16> %tmp.upgrd.1, i32 0		; <i16> [#uses=1]
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1		; <i16> [#uses=1]
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2		; <i16> [#uses=1]
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3		; <i16> [#uses=1]
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 6		; <i16> [#uses=1]
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5		; <i16> [#uses=1]
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 4		; <i16> [#uses=1]
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7		; <i16> [#uses=1]
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp.upgrd.2, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp2, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp15.upgrd.3 = bitcast <8 x i16> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp15.upgrd.3, <2 x i64>* %res
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
new file mode 100644
index 0000000..cde5ae9
--- /dev/null
+++ b/test/CodeGen/X86/vec_splat-2.ll

@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1
+
+define void @test(<2 x i64>* %P, i8 %x) nounwind {
+	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp36 = insertelement <16 x i8> %tmp, i8 %x, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp38 = insertelement <16 x i8> %tmp36, i8 %x, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp40 = insertelement <16 x i8> %tmp38, i8 %x, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp42 = insertelement <16 x i8> %tmp40, i8 %x, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp44 = insertelement <16 x i8> %tmp42, i8 %x, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp46 = insertelement <16 x i8> %tmp44, i8 %x, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp48 = insertelement <16 x i8> %tmp46, i8 %x, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp50 = insertelement <16 x i8> %tmp48, i8 %x, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp52 = insertelement <16 x i8> %tmp50, i8 %x, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp54 = insertelement <16 x i8> %tmp52, i8 %x, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp56 = insertelement <16 x i8> %tmp54, i8 %x, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp58 = insertelement <16 x i8> %tmp56, i8 %x, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp60 = insertelement <16 x i8> %tmp58, i8 %x, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp62 = insertelement <16 x i8> %tmp60, i8 %x, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp64 = insertelement <16 x i8> %tmp62, i8 %x, i32 15		; <<16 x i8>> [#uses=1]
+	%tmp68 = load <2 x i64>* %P		; <<2 x i64>> [#uses=1]
+	%tmp71 = bitcast <2 x i64> %tmp68 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp73 = add <16 x i8> %tmp71, %tmp64		; <<16 x i8>> [#uses=1]
+	%tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
new file mode 100644
index 0000000..649b85c
--- /dev/null
+++ b/test/CodeGen/X86/vec_splat-3.ll

@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep punpcklwd %t | count 4
+; RUN: grep punpckhwd %t | count 4
+; RUN: grep "pshufd" %t | count 8
+
+; Splat test for v8i16
+; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used twice)
+define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+
+define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}

diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
new file mode 100644
index 0000000..d9941e6
--- /dev/null
+++ b/test/CodeGen/X86/vec_splat-4.ll

@@ -0,0 +1,104 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep punpcklbw %t | count 16
+; RUN: grep punpckhbw %t | count 16
+; RUN: grep "pshufd" %t | count 16
+
+; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used 4 times)
+
+; Splat test for v16i8
+define <16 x i8 > @shuf_16i8_0(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0 , i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_1(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_2(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2 , i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_3(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3 , i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3 >
+	ret <16 x i8 > %tmp6
+}
+
+
+define <16 x i8 > @shuf_16i8_4(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_5(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5 , i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_6(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6 , i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_7(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef , i32 undef, i32 undef, i32 undef , i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_8(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8 , i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_9(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9 , i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_10(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10 , i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_11(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11 , i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_12(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12 , i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_13(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13 , i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_14(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14 , i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_15(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15 , i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15 >
+	ret <16 x i8 > %tmp6
+}

diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
new file mode 100644
index 0000000..a87fbd0
--- /dev/null
+++ b/test/CodeGen/X86/vec_splat.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup
+
+define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
+	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
+	%tmp4 = insertelement <4 x float> %tmp2, float %X, i32 2		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp4, float %X, i32 3		; <<4 x float>> [#uses=1]
+	%tmp8 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
+	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp10, <4 x float>* %P
+	ret void
+}
+
+define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
+	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
+	%tmp2 = insertelement <2 x double> %tmp, double %X, i32 1		; <<2 x double>> [#uses=1]
+	%tmp4 = load <2 x double>* %Q		; <<2 x double>> [#uses=1]
+	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
+	store <2 x double> %tmp6, <2 x double>* %P
+	ret void
+}

diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
new file mode 100644
index 0000000..b1613fb
--- /dev/null
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll

@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 -o %t
+; RUN: grep minss %t | grep CPI | count 2
+; RUN: grep CPI   %t | not grep movss
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+
+define i16 @test1(float %f) nounwind {
+	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
+	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
+	ret i16 %tmp69
+}
+
+define i16 @test2(float %f) nounwind {
+	%tmp28 = fsub float %f, 1.000000e+00		; <float> [#uses=1]
+	%tmp37 = fmul float %tmp28, 5.000000e-01		; <float> [#uses=1]
+	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
+	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
+	%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
+	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	ret i16 %tmp69
+}
+
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)

diff --git a/test/CodeGen/X86/vec_zero-2.ll b/test/CodeGen/X86/vec_zero-2.ll
new file mode 100644
index 0000000..cdb030e
--- /dev/null
+++ b/test/CodeGen/X86/vec_zero-2.ll

@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define i32 @t() {
+entry:
+	br i1 true, label %bb4743, label %bb1656
+bb1656:		; preds = %entry
+	ret i32 0
+bb1664:		; preds = %entry
+	br i1 false, label %bb5310, label %bb4743
+bb4743:		; preds = %bb1664
+	%tmp5256 = bitcast <2 x i64> zeroinitializer to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp5257 = sub <8 x i16> %tmp5256, zeroinitializer		; <<8 x i16>> [#uses=1]
+	%tmp5258 = bitcast <8 x i16> %tmp5257 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5265 = bitcast <2 x i64> %tmp5258 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp5266 = call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5267 = bitcast <16 x i8> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5294 = and <2 x i64> zeroinitializer, %tmp5267		; <<2 x i64>> [#uses=1]
+	br label %bb5310
+bb5310:		; preds = %bb4743, %bb1664
+	%tmp5294.pn = phi <2 x i64> [ %tmp5294, %bb4743 ], [ zeroinitializer, %bb1664 ]		; <<2 x i64>> [#uses=0]
+	ret i32 0
+}
+
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone

diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
new file mode 100644
index 0000000..ae5af58
--- /dev/null
+++ b/test/CodeGen/X86/vec_zero.ll

@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xorps | count 2
+
+define void @foo(<4 x float>* %P) {
+        %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
+        store <4 x float> %S, <4 x float>* %P
+        ret void
+}
+
+define void @bar(<4 x i32>* %P) {
+        %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
+        %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
+        store <4 x i32> %S, <4 x i32>* %P
+        ret void
+}
+

diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
new file mode 100644
index 0000000..296378c
--- /dev/null
+++ b/test/CodeGen/X86/vec_zero_cse.ll

@@ -0,0 +1,35 @@
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+
+@M1 = external global <1 x i64>
+@M2 = external global <2 x i32>
+
+@S1 = external global <2 x i64>
+@S2 = external global <4 x i32>
+
+define void @test() {
+  store <1 x i64> zeroinitializer, <1 x i64>* @M1
+  store <2 x i32> zeroinitializer, <2 x i32>* @M2
+  ret void
+}
+
+define void @test2() {
+  store <1 x i64> < i64 -1 >, <1 x i64>* @M1
+  store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
+  ret void
+}
+
+define void @test3() {
+  store <2 x i64> zeroinitializer, <2 x i64>* @S1
+  store <4 x i32> zeroinitializer, <4 x i32>* @S2
+  ret void
+}
+
+define void @test4() {
+  store <2 x i64> < i64 -1, i64 -1>, <2 x i64>* @S1
+  store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* @S2
+  ret void
+}
+
+

diff --git a/test/CodeGen/X86/vector-intrinsics.ll b/test/CodeGen/X86/vector-intrinsics.ll
new file mode 100644
index 0000000..edf58b9
--- /dev/null
+++ b/test/CodeGen/X86/vector-intrinsics.ll

@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 | grep call | count 16
+
+declare <4 x double> @llvm.sin.v4f64(<4 x double> %p)
+declare <4 x double> @llvm.cos.v4f64(<4 x double> %p)
+declare <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
+declare <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32)
+
+define <4 x double> @foo(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.sin.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+define <4 x double> @goo(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.cos.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+define <4 x double> @moo(<4 x double> %p, <4 x double> %q)
+{
+  %t = call <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
+  ret <4 x double> %t
+}
+define <4 x double> @zoo(<4 x double> %p, i32 %q)
+{
+  %t = call <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32 %q)
+  ret <4 x double> %t
+}

diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll
new file mode 100644
index 0000000..51cd872
--- /dev/null
+++ b/test/CodeGen/X86/vector-rem.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 4
+
+define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) {
+	%m = srem <4 x i32> %t, %u
+	ret <4 x i32> %m
+}
+define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) {
+	%m = urem <4 x i32> %t, %u
+	ret <4 x i32> %m
+}
+define <4 x float> @qux(<4 x float> %t, <4 x float> %u) {
+	%m = frem <4 x float> %t, %u
+	ret <4 x float> %m
+}

diff --git a/test/CodeGen/X86/vector-variable-idx.ll b/test/CodeGen/X86/vector-variable-idx.ll
new file mode 100644
index 0000000..2a4d18c
--- /dev/null
+++ b/test/CodeGen/X86/vector-variable-idx.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep movss | count 2
+; PR2676
+
+define float @foo(<4 x float> %p, i32 %t) {
+  %z = extractelement <4 x float> %p, i32 %t
+  ret float %z
+}
+define <4 x float> @bar(<4 x float> %p, float %f, i32 %t) {
+  %z = insertelement <4 x float> %p, float %f, i32 %t
+  ret <4 x float> %z
+}

diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
new file mode 100644
index 0000000..3fff849
--- /dev/null
+++ b/test/CodeGen/X86/vector.ll

@@ -0,0 +1,156 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llc < %s -march=x86 -mcpu=i386 > %t
+; RUN: llc < %s -march=x86 -mcpu=yonah > %t
+
+%d8 = type <8 x double>
+%f1 = type <1 x float>
+%f2 = type <2 x float>
+%f4 = type <4 x float>
+%f8 = type <8 x float>
+%i4 = type <4 x i32>
+
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
+        %p = load %f1* %P               ; <%f1> [#uses=1]
+        %q = load %f1* %Q               ; <%f1> [#uses=1]
+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
+        store %f1 %R, %f1* %S
+        ret void
+}
+
+define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
+        %p = load %f2* %P               ; <%f2> [#uses=1]
+        %q = load %f2* %Q               ; <%f2> [#uses=1]
+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
+        store %f2 %R, %f2* %S
+        ret void
+}
+
+define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fdiv %f8 %p, %q            ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+;;; TEST VECTOR CONSTRUCTS
+
+define void @test_cst(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_zero(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_undef(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_constant_insert(%f4* %S) {
+        %R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0               ; <%f4> [#uses
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_variable_buildvector(float %F, %f4* %S) {
+        %R = insertelement %f4 zeroinitializer, float %F, i32 0         ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_scalar_to_vector(float %F, %f4* %S) {
+        %R = insertelement %f4 undef, float %F, i32 0           ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define float @test_extract_elt(%f8* %P) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %R = extractelement %f8 %p, i32 3               ; <float> [#uses=1]
+        ret float %R
+}
+
+define double @test_extract_elt2(%d8* %P) {
+        %p = load %d8* %P               ; <%d8> [#uses=1]
+        %R = extractelement %d8 %p, i32 3               ; <double> [#uses=1]
+        ret double %R
+}
+
+define void @test_cast_1(%f4* %b, %i4* %a) {
+        %tmp = load %f4* %b             ; <%f4> [#uses=1]
+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
+        %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
+        %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           ; <%i4> [#uses=1]
+        store %i4 %tmp4, %i4* %a
+        ret void
+}
+
+define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
+        %T = load %f8* %a               ; <%f8> [#uses=1]
+        %T2 = bitcast %f8 %T to <8 x i32>               ; <<8 x i32>> [#uses=1]
+        store <8 x i32> %T2, <8 x i32>* %b
+        ret void
+}
+
+
+;;; TEST IMPORTANT IDIOMS
+
+define void @splat(%f4* %P, %f4* %Q, float %X) {
+        %tmp = insertelement %f4 undef, float %X, i32 0         ; <%f4> [#uses=1]
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]
+        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
+        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
+        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %P
+        ret void
+}
+
+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
+        %tmp = insertelement %i4 undef, i32 %X, i32 0           ; <%i4> [#uses=1]
+        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]
+        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]
+        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]
+        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
+        store %i4 %R, %i4* %P
+        ret void
+}
+

diff --git a/test/CodeGen/X86/vfcmp.ll b/test/CodeGen/X86/vfcmp.ll
new file mode 100644
index 0000000..f5f5293
--- /dev/null
+++ b/test/CodeGen/X86/vfcmp.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2620
+
+
+define void @t2(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
+	%A = fcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+        sext <2 x i1> %A to <2 x i64>
+	extractelement <2 x i64> %1, i32 1		; <i64>:2 [#uses=1]
+	lshr i64 %2, 63		; <i64>:3 [#uses=1]
+	trunc i64 %3 to i1		; <i1>:4 [#uses=1]
+	zext i1 %4 to i8		; <i8>:5 [#uses=1]
+	insertelement <2 x i8> zeroinitializer, i8 %5, i32 1		; <<2 x i8>>:6 [#uses=1]
+	store <2 x i8> %6, <2 x i8>* null
+	ret void
+}

diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
new file mode 100644
index 0000000..5e1e0c8
--- /dev/null
+++ b/test/CodeGen/X86/volatile.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep movsd | count 5
+
+@x = external global double
+
+define void @foo() nounwind  {
+  %a = volatile load double* @x
+  volatile store double 0.0, double* @x
+  volatile store double 0.0, double* @x
+  %b = volatile load double* @x
+  ret void
+}
+
+define void @bar() nounwind  {
+  %c = volatile load double* @x
+  ret void
+}

diff --git a/test/CodeGen/X86/vortex-bug.ll b/test/CodeGen/X86/vortex-bug.ll
new file mode 100644
index 0000000..40f1117
--- /dev/null
+++ b/test/CodeGen/X86/vortex-bug.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.blktkntype = type { i32, i32 }
+	%struct.fieldstruc = type { [128 x i8], %struct.blktkntype*, i32, i32 }
+
+define fastcc i32 @Env_GetFieldStruc(i8* %FieldName, i32* %Status, %struct.fieldstruc* %FieldStruc) nounwind  {
+entry:
+	br label %bb137.i
+
+bb137.i:		; preds = %bb137.i, %entry
+	%FieldName_addr.0209.rec.i = phi i64 [ %tmp139.rec.i, %bb137.i ], [ 0, %entry ]		; <i64> [#uses=1]
+	%tmp147213.i = phi i32 [ %tmp147.i, %bb137.i ], [ 1, %entry ]		; <i32> [#uses=2]
+	%tmp139.rec.i = add i64 %FieldName_addr.0209.rec.i, 1		; <i64> [#uses=2]
+	%tmp141142.i = sext i32 %tmp147213.i to i64		; <i64> [#uses=0]
+	%tmp147.i = add i32 %tmp147213.i, 1		; <i32> [#uses=1]
+	br i1 false, label %bb137.i, label %bb149.i.loopexit
+
+bb149.i.loopexit:		; preds = %bb137.i
+	%tmp139.i = getelementptr i8* %FieldName, i64 %tmp139.rec.i		; <i8*> [#uses=0]
+	unreachable
+}

diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
new file mode 100644
index 0000000..ae845e0
--- /dev/null
+++ b/test/CodeGen/X86/vshift-1.ll

@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psllq
+  %shl = shl <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psllq
+  %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
+  %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
+  %shl = shl <2 x i64> %val, %1
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: pslld
+  %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %shl = shl <4 x i32> %val, %3
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psllw
+  %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
+; Make sure the shift amount is properly zero extended.
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psllw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %shl = shl <8 x i16> %val, %7
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+

diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
new file mode 100644
index 0000000..36feb11
--- /dev/null
+++ b/test/CodeGen/X86/vshift-2.ll

@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psrlq
+  %lshr = lshr <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %lshr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psrlq
+  %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
+  %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
+  %lshr = lshr <2 x i64> %val, %1
+  store <2 x i64> %lshr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: psrld
+  %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 >
+  store <4 x i32> %lshr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrld
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %lshr = lshr <4 x i32> %val, %3
+  store <4 x i32> %lshr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psrlw
+  %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %lshr, <8 x i16>* %dst
+  ret void
+}
+
+; properly zero extend the shift amount
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psrlw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %lshr = lshr <8 x i16> %val, %7
+  store <8 x i16> %lshr, <8 x i16>* %dst
+  ret void
+}

diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
new file mode 100644
index 0000000..20d3f48
--- /dev/null
+++ b/test/CodeGen/X86/vshift-3.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+; Note that x86 does have ashr 
+
+; shift1a can't use a packed shift
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: sarl
+  %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %ashr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: psrad	$5
+  %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
+  store <4 x i32> %ashr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %ashr = ashr <4 x i32> %val, %3
+  store <4 x i32> %ashr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psraw	$5
+  %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %ashr, <8 x i16>* %dst
+  ret void
+}
+
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psraw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %ashr = ashr <8 x i16> %val, %7
+  store <8 x i16> %ashr, <8 x i16>* %dst
+  ret void
+}

diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
new file mode 100644
index 0000000..9773cbe
--- /dev/null
+++ b/test/CodeGen/X86/vshift-4.ll

@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same when using a shuffle splat.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psllq
+  %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %shl = shl <2 x i64> %val, %shamt
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+; shift1b can't use a packed shift
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: shll
+  %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %shl = shl <2 x i64> %val, %shamt
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2c:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: movzwl
+; CHECK: psllw
+  %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+  %shl = shl <8 x i16> %val, %shamt
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: psllw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %shl = shl <8 x i16> %val, %7
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+

diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
new file mode 100644
index 0000000..a543f38
--- /dev/null
+++ b/test/CodeGen/X86/vshift-5.ll

@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -disable-mmx | FileCheck %s
+
+; When loading the shift amount from memory, avoid generating the splat.
+
+define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5a:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5c:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5d:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}

diff --git a/test/CodeGen/X86/vshift_scalar.ll b/test/CodeGen/X86/vshift_scalar.ll
new file mode 100644
index 0000000..9dd8478
--- /dev/null
+++ b/test/CodeGen/X86/vshift_scalar.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+; Legalization test that requires scalarizing a vector.
+
+define void @update(<1 x i32> %val, <1 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <1 x i32> %val, < i32 2>
+	%shr = ashr <1 x i32> %val, < i32 4>
+	store <1 x i32> %shr, <1 x i32>* %dst
+	ret void
+}

diff --git a/test/CodeGen/X86/vshift_split.ll b/test/CodeGen/X86/vshift_split.ll
new file mode 100644
index 0000000..359d36d
--- /dev/null
+++ b/test/CodeGen/X86/vshift_split.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
+	ret <2 x i64> %shr
+}

diff --git a/test/CodeGen/X86/vshift_split2.ll b/test/CodeGen/X86/vshift_split2.ll
new file mode 100644
index 0000000..0f8c2b8
--- /dev/null
+++ b/test/CodeGen/X86/vshift_split2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+; Legalization example that requires splitting a large vector into smaller pieces.
+
+define void @update(<8 x i32> %val, <8 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
+	%shr = ashr <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
+	store <8 x i32> %shr, <8 x i32>* %dst
+	ret void
+}

diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
new file mode 100644
index 0000000..a247c6e
--- /dev/null
+++ b/test/CodeGen/X86/vsplit-and.ll

@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -disable-mmx |  FileCheck %s
+
+
+define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
+; CHECK: andb
+  %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
+  %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
+  %t1 = and <2 x i1> %cmp1, %cmp2
+  %t2 = sext <2 x i1> %t1 to <2 x i64>
+  store <2 x i64> %t2, <2 x i64>* %dst
+  ret void
+}
+
+define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
+; CHECK: andb
+  %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
+  %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
+  %t1 = and <3 x i1> %cmp1, %cmp2
+  %t2 = sext <3 x i1> %t1 to <3 x i64>
+  store <3 x i64> %t2, <3 x i64>* %dst
+  ret void
+}

diff --git a/test/CodeGen/X86/weak.ll b/test/CodeGen/X86/weak.ll
new file mode 100644
index 0000000..8590e8d
--- /dev/null
+++ b/test/CodeGen/X86/weak.ll

@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86
+@a = extern_weak global i32             ; <i32*> [#uses=1]
+@b = global i32* @a             ; <i32**> [#uses=0]
+

diff --git a/test/CodeGen/X86/wide-integer-fold.ll b/test/CodeGen/X86/wide-integer-fold.ll
new file mode 100644
index 0000000..b3b4d24
--- /dev/null
+++ b/test/CodeGen/X86/wide-integer-fold.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK:  movq  $-65535, %rax
+
+; DAGCombiner should fold this to a simple constant.
+
+define i64 @foo(i192 %a) nounwind {
+  %t = or i192 %a, -22300404916163702203072254898040925442801665
+  %s = and i192 %t, -22300404916163702203072254898040929737768960
+  %u = lshr i192 %s, 128
+  %v = trunc i192 %u to i64
+  ret i64 %v
+}

diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
new file mode 100644
index 0000000..f8d0690
--- /dev/null
+++ b/test/CodeGen/X86/widen_arith-1.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx |  FileCheck %s
+
+; Widen a v3i8 to v16i8 to use a vector add
+
+define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
+entry:
+; CHECK-NOT: pextrw
+; CHECK: paddb
+; CHECK: pextrb
+	%dst.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
+	%src.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i8>* %dst, <3 x i8>** %dst.addr
+	store <3 x i8>* %src, <3 x i8>** %src.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i8>** %dst.addr		; <<3 x i8>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i8>* %tmp3, i32 %tmp2		; <<3 x i8>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i8>** %src.addr		; <<3 x i8>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i8>* %tmp5, i32 %tmp4		; <<3 x i8>*> [#uses=1]
+	%tmp7 = load <3 x i8>* %arrayidx6		; <<3 x i8>> [#uses=1]
+	%add = add <3 x i8> %tmp7, < i8 1, i8 1, i8 1 >		; <<3 x i8>> [#uses=1]
+	store <3 x i8> %add, <3 x i8>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
new file mode 100644
index 0000000..fdecaa3
--- /dev/null
+++ b/test/CodeGen/X86/widen_arith-2.ll

@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx  | FileCheck %s
+; CHECK: paddb
+; CHECK: pand
+
+; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
+
+define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
+entry:
+	%dst_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%src_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=8]
+	%dst = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	%src = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	store i64* %dst_i, i64** %dst_i.addr
+	store i64* %src_i, i64** %src_i.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%arrayidx = getelementptr i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
+	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv, <8 x i8>** %dst
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
+	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv7, <8 x i8>** %src
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
+	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
+	%and = and <8 x i8> %add, < i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4 >		; <<8 x i8>> [#uses=1]
+	store <8 x i8> %and, <8 x i8>* %arrayidx10
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
new file mode 100644
index 0000000..1f2c250
--- /dev/null
+++ b/test/CodeGen/X86/widen_arith-3.ll

@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -post-RA-scheduler=true | FileCheck %s
+; CHECK: paddw
+; CHECK: pextrw
+; CHECK: movd
+
+; Widen a v3i16 to v8i16 to do a vector add
+
+@.str = internal constant [4 x i8] c"%d \00"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [2 x i8] c"\0A\00"		; <[2 x i8]*> [#uses=1]
+
+define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x i16>*		; <<3 x i16>**> [#uses=2]
+	%src.addr = alloca <3 x i16>*		; <<3 x i16>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x i16>, align 8		; <<3 x i16>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i16>* %dst, <3 x i16>** %dst.addr
+	store <3 x i16>* %src, <3 x i16>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x i16> < i16 1, i16 1, i16 1 >, <3 x i16>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i16>** %dst.addr		; <<3 x i16>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i16>* %tmp3, i32 %tmp2		; <<3 x i16>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i16>** %src.addr		; <<3 x i16>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i16>* %tmp5, i32 %tmp4		; <<3 x i16>*> [#uses=1]
+	%tmp7 = load <3 x i16>* %arrayidx6		; <<3 x i16>> [#uses=1]
+	%add = add <3 x i16> %tmp7, < i16 1, i16 1, i16 1 >		; <<3 x i16>> [#uses=1]
+	store <3 x i16> %add, <3 x i16>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
new file mode 100644
index 0000000..f7506ae
--- /dev/null
+++ b/test/CodeGen/X86/widen_arith-4.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: psubw
+; CHECK-NEXT: pmullw
+
+; Widen a v5i16 to v8i16 to do a vector sub and multiple
+
+define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <5 x i16>*		; <<5 x i16>**> [#uses=2]
+	%src.addr = alloca <5 x i16>*		; <<5 x i16>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <5 x i16>, align 16		; <<5 x i16>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <5 x i16>* %dst, <5 x i16>** %dst.addr
+	store <5 x i16>* %src, <5 x i16>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <5 x i16> < i16 1, i16 1, i16 1, i16 0, i16 0 >, <5 x i16>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <5 x i16>** %dst.addr		; <<5 x i16>*> [#uses=1]
+	%arrayidx = getelementptr <5 x i16>* %tmp3, i32 %tmp2		; <<5 x i16>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <5 x i16>** %src.addr		; <<5 x i16>*> [#uses=1]
+	%arrayidx6 = getelementptr <5 x i16>* %tmp5, i32 %tmp4		; <<5 x i16>*> [#uses=1]
+	%tmp7 = load <5 x i16>* %arrayidx6		; <<5 x i16>> [#uses=1]
+	%sub = sub <5 x i16> %tmp7, < i16 271, i16 271, i16 271, i16 271, i16 271 >		; <<5 x i16>> [#uses=1]
+	%mul = mul <5 x i16> %sub, < i16 2, i16 2, i16 2, i16 2, i16 2 >		; <<5 x i16>> [#uses=1]
+	store <5 x i16> %mul, <5 x i16>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
new file mode 100644
index 0000000..f7f3408
--- /dev/null
+++ b/test/CodeGen/X86/widen_arith-5.ll

@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx  | FileCheck %s
+; CHECK: movaps
+; CHECK: pmulld
+; CHECK: psubd
+
+; widen a v3i32 to v4i32 to do a vector multiple and a subtraction
+
+define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x i32>*		; <<3 x i32>**> [#uses=2]
+	%src.addr = alloca <3 x i32>*		; <<3 x i32>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x i32>, align 16		; <<3 x i32>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i32>* %dst, <3 x i32>** %dst.addr
+	store <3 x i32>* %src, <3 x i32>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x i32> < i32 1, i32 1, i32 1 >, <3 x i32>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i32>** %dst.addr		; <<3 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i32>* %tmp3, i32 %tmp2		; <<3 x i32>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i32>** %src.addr		; <<3 x i32>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i32>* %tmp5, i32 %tmp4		; <<3 x i32>*> [#uses=1]
+	%tmp7 = load <3 x i32>* %arrayidx6		; <<3 x i32>> [#uses=1]
+	%mul = mul <3 x i32> %tmp7, < i32 4, i32 4, i32 4 >		; <<3 x i32>> [#uses=1]
+	%sub = sub <3 x i32> %mul, < i32 3, i32 3, i32 3 >		; <<3 x i32>> [#uses=1]
+	store <3 x i32> %sub, <3 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
new file mode 100644
index 0000000..538123f
--- /dev/null
+++ b/test/CodeGen/X86/widen_arith-6.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: mulps
+; CHECK: addps
+
+; widen a v3f32 to vfi32 to do a vector multiple and an add
+
+define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x float>*		; <<3 x float>**> [#uses=2]
+	%src.addr = alloca <3 x float>*		; <<3 x float>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x float>, align 16		; <<3 x float>*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x float>* %dst, <3 x float>** %dst.addr
+	store <3 x float>* %src, <3 x float>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00 >, <3 x float>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x float>** %dst.addr		; <<3 x float>*> [#uses=1]
+	%arrayidx = getelementptr <3 x float>* %tmp3, i32 %tmp2		; <<3 x float>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x float>** %src.addr		; <<3 x float>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x float>* %tmp5, i32 %tmp4		; <<3 x float>*> [#uses=1]
+	%tmp7 = load <3 x float>* %arrayidx6		; <<3 x float>> [#uses=1]
+	%tmp8 = load <3 x float>* %v		; <<3 x float>> [#uses=1]
+	%mul = fmul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
+	%add = fadd <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
+	store <3 x float> %add, <3 x float>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp9 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp9, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
new file mode 100644
index 0000000..d4ab174
--- /dev/null
+++ b/test/CodeGen/X86/widen_cast-1.ll

@@ -0,0 +1,44 @@
+; RUN: llc -march=x86 -mattr=+sse42 < %s -disable-mmx | FileCheck %s
+; CHECK: paddw
+; CHECK: pextrd
+; CHECK: movd
+
+; bitcast a v4i16 to v2i32
+
+define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {
+entry:
+	%dst.addr = alloca <2 x i32>*		; <<2 x i32>**> [#uses=2]
+	%src.addr = alloca <4 x i16>*		; <<4 x i16>**> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <2 x i32>* %dst, <2 x i32>** %dst.addr
+	store <4 x i16>* %src, <4 x i16>** %src.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp1 = load i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <2 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <2 x i32>* %tmp2, i32 %tmp1		; <<2 x i32>*> [#uses=1]
+	%tmp3 = load i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <4 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%arrayidx5 = getelementptr <4 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
+	%tmp6 = load <4 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
+	%conv = bitcast <4 x i16> %add to <2 x i32>		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %conv, <2 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
new file mode 100644
index 0000000..1e626a2
--- /dev/null
+++ b/test/CodeGen/X86/widen_cast-2.ll

@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: pextrd
+; CHECK: pextrd
+; CHECK: movd
+; CHECK: movaps
+
+
+; bitcast v14i16 to v7i32
+
+define void @convert(<7 x i32>* %dst, <14 x i16>* %src) nounwind {
+entry:
+	%dst.addr = alloca <7 x i32>*		; <<7 x i32>**> [#uses=2]
+	%src.addr = alloca <14 x i16>*		; <<14 x i16>**> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <7 x i32>* %dst, <7 x i32>** %dst.addr
+	store <14 x i16>* %src, <14 x i16>** %src.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp1 = load i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <7 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <7 x i32>* %tmp2, i32 %tmp1		; <<7 x i32>*> [#uses=1]
+	%tmp3 = load i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <14 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%arrayidx5 = getelementptr <14 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
+	%tmp6 = load <14 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%add = add <14 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
+	%conv = bitcast <14 x i16> %add to <7 x i32>		; <<7 x i32>> [#uses=1]
+	store <7 x i32> %conv, <7 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_cast-3.ll b/test/CodeGen/X86/widen_cast-3.ll
new file mode 100644
index 0000000..02674dd
--- /dev/null
+++ b/test/CodeGen/X86/widen_cast-3.ll

@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: pextrd
+
+; bitcast v12i8 to v3i32
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin10.0.0d2"
+
+define void @convert(<12 x i8>* %dst.addr, <3 x i32> %src) nounwind {
+entry:
+	%add = add <3 x i32> %src, < i32 1, i32 1, i32 1 >		; <<3 x i32>> [#uses=1]
+	%conv = bitcast <3 x i32> %add to <12 x i8>		; <<12 x i8>> [#uses=1]
+	store <12 x i8> %conv, <12 x i8>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
new file mode 100644
index 0000000..5f31e56
--- /dev/null
+++ b/test/CodeGen/X86/widen_cast-4.ll

@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+
+; v8i8 that is widen to v16i8 then split
+; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
+; Unfortunately, we don't split the store so we don't get the code we want.
+
+define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
+entry:
+	%dst_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%src_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=8]
+	%dst = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	%src = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	store i64* %dst_i, i64** %dst_i.addr
+	store i64* %src_i, i64** %src_i.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%arrayidx = getelementptr i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
+	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv, <8 x i8>** %dst
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
+	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv7, <8 x i8>** %src
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
+	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
+	%shr = ashr <8 x i8> %add, < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >		; <<8 x i8>> [#uses=1]
+	store <8 x i8> %shr, <8 x i8>* %arrayidx10
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+

diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
new file mode 100644
index 0000000..d1d7fec
--- /dev/null
+++ b/test/CodeGen/X86/widen_cast-5.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: movl
+; CHECK: movd
+
+; bitcast a i64 to v2i32
+
+define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
+entry:
+	%conv = bitcast i64 %src to <2 x i32>
+	%xor = xor <2 x i32> %conv, < i32 255, i32 32767 >
+	store <2 x i32> %xor, <2 x i32>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_cast-6.ll b/test/CodeGen/X86/widen_cast-6.ll
new file mode 100644
index 0000000..08759bf
--- /dev/null
+++ b/test/CodeGen/X86/widen_cast-6.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -disable-mmx | FileCheck %s
+; CHECK: movd
+
+; Test bit convert that requires widening in the operand.
+
+define i32 @return_v2hi() nounwind {
+entry:
+	%retval12 = bitcast <2 x i16> zeroinitializer to i32		; <i32> [#uses=1]
+	ret i32 %retval12
+}

diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
new file mode 100644
index 0000000..a2029dd
--- /dev/null
+++ b/test/CodeGen/X86/widen_conv-1.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: pshufd
+; CHECK: paddd
+
+; truncate v2i64 to v2i32
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+entry:
+	%val = trunc <2 x i64> %src to <2 x i32>
+	%add = add <2 x i32> %val, < i32 1, i32 1 >
+	store <2 x i32> %add, <2 x i32>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_conv-2.ll b/test/CodeGen/X86/widen_conv-2.ll
new file mode 100644
index 0000000..b24a9b3
--- /dev/null
+++ b/test/CodeGen/X86/widen_conv-2.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: movswl
+; CHECK: movswl
+
+; sign extension v2i32 to v2i16
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i16> %src) nounwind {
+entry:
+	%signext = sext <2 x i16> %src to <2 x i32>		; <<12 x i8>> [#uses=1]
+	store <2 x i32> %signext, <2 x i32>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
new file mode 100644
index 0000000..1a40800
--- /dev/null
+++ b/test/CodeGen/X86/widen_conv-3.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: cvtsi2ss
+
+; sign to float v2i16 to v2f32
+
+define void @convert(<2 x float>* %dst.addr, <2 x i16> %src) nounwind {
+entry:
+	%val = sitofp <2 x i16> %src to <2 x float>
+	store <2 x float> %val, <2 x float>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
new file mode 100644
index 0000000..e505b62
--- /dev/null
+++ b/test/CodeGen/X86/widen_conv-4.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: cvtsi2ss
+
+; unsigned to float v7i16 to v7f32
+
+define void @convert(<7 x float>* %dst.addr, <7 x i16> %src) nounwind {
+entry:
+	%val = sitofp <7 x i16> %src to <7 x float>
+	store <7 x float> %val, <7 x float>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_extract-1.ll b/test/CodeGen/X86/widen_extract-1.ll
new file mode 100644
index 0000000..308e6b8
--- /dev/null
+++ b/test/CodeGen/X86/widen_extract-1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+; widen extract subvector
+
+define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
+entry:
+; CHECK: convert:
+; CHECK: unpcklpd {{%xmm[0-7]}}, {{%xmm[0-7]}}
+; CHECK-NEXT: movapd
+  %val = shufflevector <3 x double> %src, <3 x double> undef, <2 x i32> < i32 0, i32 1>
+  store <2 x double> %val, <2 x double>* %dst.addr
+  ret void
+}

diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
new file mode 100644
index 0000000..f6c4af0
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-0.ll

@@ -0,0 +1,21 @@
+; RUN: llc < %s -o - -march=x86-64 | FileCheck %s
+; PR4891
+
+; Both loads should happen before either store.
+
+; CHECK: movl  (%rdi), %eax
+; CHECK: movl  (%rsi), %ecx
+; CHECK: movl  %ecx, (%rdi)
+; CHECK: movl  %eax, (%rsi)
+
+define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
+entry:
+  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
+  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
+  %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %tmp1, <2 x i16>* %b, align 2
+  %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
+  %tmp3 = extractelement <1 x i32> %tmp5, i32 0   ; <i32> [#uses=1]
+  store i32 %tmp3, i32* %c, align 4
+  ret void
+}

diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
new file mode 100644
index 0000000..d397645
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-1.ll

@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -disable-mmx | FileCheck %s
+; PR4891
+
+; This load should be before the call, not after.
+
+; CHECK: movaps    compl+128(%rip), %xmm0
+; CHECK: movaps  %xmm0, (%rsp)
+; CHECK: callq   killcommon
+
+@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
+
+declare void @killcommon(i32* noalias)
+
+define void @reset(<2 x float>* noalias %garbage1) {
+"file complex.c, line 27, bb1":
+  %changed = alloca i32, align 4                  ; <i32*> [#uses=3]
+  br label %"file complex.c, line 27, bb13"
+
+"file complex.c, line 27, bb13":                  ; preds = %"file complex.c, line 27, bb1"
+  store i32 0, i32* %changed, align 4
+  %r2 = getelementptr float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
+  %r3 = bitcast float* %r2 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  %r4 = load <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
+  call void @killcommon(i32* %changed)
+  br label %"file complex.c, line 34, bb4"
+
+"file complex.c, line 34, bb4":                   ; preds = %"file complex.c, line 27, bb13"
+  %r5 = load i32* %changed, align 4               ; <i32> [#uses=1]
+  %r6 = icmp eq i32 %r5, 0                        ; <i1> [#uses=1]
+  %r7 = zext i1 %r6 to i32                        ; <i32> [#uses=1]
+  %r8 = icmp ne i32 %r7, 0                        ; <i1> [#uses=1]
+  br i1 %r8, label %"file complex.c, line 34, bb7", label %"file complex.c, line 27, bb5"
+
+"file complex.c, line 27, bb5":                   ; preds = %"file complex.c, line 34, bb4"
+  br label %"file complex.c, line 35, bb6"
+
+"file complex.c, line 35, bb6":                   ; preds = %"file complex.c, line 27, bb5"
+  %r11 = ptrtoint <2 x float>* %garbage1 to i64   ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  store <2 x float> %r4, <2 x float>* %r12, align 4
+  br label %"file complex.c, line 34, bb7"
+
+"file complex.c, line 34, bb7":                   ; preds = %"file complex.c, line 35, bb6", %"file complex.c, line 34, bb4"
+  ret void
+}

diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
new file mode 100644
index 0000000..11383fa
--- /dev/null
+++ b/test/CodeGen/X86/widen_load-2.ll

@@ -0,0 +1,155 @@
+; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
+
+; Test based on pr5626 to load/store
+;
+
+%i32vec3 = type <3 x i32>
+define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
+; CHECK: movaps
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+	%a = load %i32vec3* %ap, align 16
+	%b = load %i32vec3* %bp, align 16
+	%x = add %i32vec3 %a, %b
+	store %i32vec3 %x, %i32vec3* %ret, align 16
+	ret void
+}
+
+define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+	%a = load %i32vec3* %ap
+	%b = load %i32vec3* %bp
+	%x = add %i32vec3 %a, %b
+	store %i32vec3 %x, %i32vec3* %ret
+	ret void
+}
+
+%i32vec7 = type <7 x i32>
+define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+; CHECK: movaps
+	%a = load %i32vec7* %ap, align 16
+	%b = load %i32vec7* %bp, align 16
+	%x = add %i32vec7 %a, %b
+	store %i32vec7 %x, %i32vec7* %ret, align 16
+	ret void
+}
+
+%i32vec12 = type <12 x i32>
+define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: movaps
+	%a = load %i32vec12* %ap, align 16
+	%b = load %i32vec12* %bp, align 16
+	%x = add %i32vec12 %a, %b
+	store %i32vec12 %x, %i32vec12* %ret, align 16
+	ret void
+}
+
+
+%i16vec3 = type <3 x i16>
+define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: movd
+; CHECK: pextrw
+	%a = load %i16vec3* %ap, align 16
+	%b = load %i16vec3* %bp, align 16
+	%x = add %i16vec3 %a, %b
+	store %i16vec3 %x, %i16vec3* %ret, align 16
+	ret void
+}
+
+%i16vec4 = type <4 x i16>
+define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: movq
+	%a = load %i16vec4* %ap, align 16
+	%b = load %i16vec4* %bp, align 16
+	%x = add %i16vec4 %a, %b
+	store %i16vec4 %x, %i16vec4* %ret, align 16
+	ret void
+}
+
+%i16vec12 = type <12 x i16>
+define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movq
+; CHECK: movaps
+	%a = load %i16vec12* %ap, align 16
+	%b = load %i16vec12* %bp, align 16
+	%x = add %i16vec12 %a, %b
+	store %i16vec12 %x, %i16vec12* %ret, align 16
+	ret void
+}
+
+%i16vec18 = type <18 x i16>
+define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movd
+; CHECK: movaps
+; CHECK: movaps
+	%a = load %i16vec18* %ap, align 16
+	%b = load %i16vec18* %bp, align 16
+	%x = add %i16vec18 %a, %b
+	store %i16vec18 %x, %i16vec18* %ret, align 16
+	ret void
+}
+
+
+%i8vec3 = type <3 x i8>
+define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
+; CHECK: movaps
+; CHECK: paddb
+; CHECK: pextrb
+; CHECK: movb
+	%a = load %i8vec3* %ap, align 16
+	%b = load %i8vec3* %bp, align 16
+	%x = add %i8vec3 %a, %b
+	store %i8vec3 %x, %i8vec3* %ret, align 16
+	ret void
+}
+
+%i8vec31 = type <31 x i8>
+define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
+; CHECK: movaps
+; CHECK: movaps
+; CHECK: paddb
+; CHECK: paddb
+; CHECK: movq
+; CHECK: pextrb
+; CHECK: pextrw
+	%a = load %i8vec31* %ap, align 16
+	%b = load %i8vec31* %bp, align 16
+	%x = add %i8vec31 %a, %b
+	store %i8vec31 %x, %i8vec31* %ret, align 16
+	ret void
+}
\ No newline at end of file

diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
new file mode 100644
index 0000000..d9de892
--- /dev/null
+++ b/test/CodeGen/X86/widen_select-1.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: jne
+
+; widening select v6i32 and then a sub
+
+define void @select(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
+entry:
+	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
+	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	store <6 x i32> %val, <6 x i32>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
new file mode 100644
index 0000000..47dba4b
--- /dev/null
+++ b/test/CodeGen/X86/widen_shuffle-1.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: insertps
+; CHECK: extractps
+
+; widening shuffle v3float and then a add
+
+define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
+entry:
+	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
+	%val = fadd <3 x float> %x, %src2
+	store <3 x float> %val, <3 x float>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/widen_shuffle-2.ll b/test/CodeGen/X86/widen_shuffle-2.ll
new file mode 100644
index 0000000..9374a02
--- /dev/null
+++ b/test/CodeGen/X86/widen_shuffle-2.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; CHECK: insertps
+; CHECK: extractps
+
+; widening shuffle v3float and then a add
+
+define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
+entry:
+	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
+	%val = fadd <3 x float> %x, %src2
+	store <3 x float> %val, <3 x float>* %dst.addr
+	ret void
+}

diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
new file mode 100644
index 0000000..2465f23
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-and-mask.ll

@@ -0,0 +1,49 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+; This should be a single mov, not a load of immediate + andq.
+; CHECK: test:
+; CHECK: movl %edi, %eax
+
+define i64 @test(i64 %x) nounwind {
+entry:
+	%tmp123 = and i64 %x, 4294967295		; <i64> [#uses=1]
+	ret i64 %tmp123
+}
+
+; This copy can't be coalesced away because it needs the implicit zero-extend.
+; CHECK: bbb:
+; CHECK: movl %edi, %edi
+
+define void @bbb(i64 %x) nounwind {
+  %t = and i64 %x, 4294967295
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This should use a 32-bit and with implicit zero-extension, not a 64-bit and
+; with a separate mov to materialize the mask.
+; rdar://7527390
+; CHECK: ccc:
+; CHECK: andl $-1048593, %edi
+
+declare void @foo(i64 %x) nounwind
+
+define void @ccc(i64 %x) nounwind {
+  %t = and i64 %x, 4293918703
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This requires a mov and a 64-bit and.
+; CHECK: ddd:
+; CHECK: movabsq $4294967296, %rax
+; CHECK: andq %rax, %rdi
+
+define void @ddd(i64 %x) nounwind {
+  %t = and i64 %x, 4294967296
+  call void @foo(i64 %t)
+  ret void
+}

diff --git a/test/CodeGen/X86/x86-64-arg.ll b/test/CodeGen/X86/x86-64-arg.ll
new file mode 100644
index 0000000..ec8dd8e
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-arg.ll

@@ -0,0 +1,15 @@
+; RUN: llc < %s | grep {movl	%edi, %eax}
+; The input value is already sign extended, don't re-extend it.
+; This testcase corresponds to:
+;   int test(short X) { return (int)X; }
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-apple-darwin8"
+
+
+define i32 @test(i16 signext  %X) {
+entry:
+        %tmp12 = sext i16 %X to i32             ; <i32> [#uses=1]
+        ret i32 %tmp12
+}
+

diff --git a/test/CodeGen/X86/x86-64-asm.ll b/test/CodeGen/X86/x86-64-asm.ll
new file mode 100644
index 0000000..2640e59
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-asm.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s
+; PR1029
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @frame_dummy() {
+entry:
+        %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]
+        ret void
+}
+

diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
new file mode 100644
index 0000000..79316f2
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep rsp
+; RUN: llc < %s | grep cvttsd2siq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define double @a(double %b) nounwind  {
+entry:
+	%tmp12 = fptoui double %b to i32		; <i32> [#uses=1]
+	%tmp123 = uitofp i32 %tmp12 to double		; <double> [#uses=1]
+	ret double %tmp123
+}

diff --git a/test/CodeGen/X86/x86-64-disp.ll b/test/CodeGen/X86/x86-64-disp.ll
new file mode 100644
index 0000000..d8059eb
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-disp.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
+
+; Fold an offset into an address even if it's not a 32-bit
+; signed integer.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@call_used_regs = external global [53 x i8], align 32
+
+define fastcc void @foo() nounwind {
+	%t = getelementptr [53 x i8]* @call_used_regs, i64 0, i64 4294967295
+	store i8 1, i8* %t, align 1
+	ret void
+}

diff --git a/test/CodeGen/X86/x86-64-frameaddr.ll b/test/CodeGen/X86/x86-64-frameaddr.ll
new file mode 100644
index 0000000..57163d3
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-frameaddr.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
+
+define i64* @stack_end_address() nounwind  {
+entry:
+	tail call i8* @llvm.frameaddress( i32 0 )
+	bitcast i8* %0 to i64*
+	ret i64* %1
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone 

diff --git a/test/CodeGen/X86/x86-64-gv-offset.ll b/test/CodeGen/X86/x86-64-gv-offset.ll
new file mode 100644
index 0000000..365e4af
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-gv-offset.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep lea
+
+	%struct.x = type { float, double }
+@X = global %struct.x { float 1.000000e+00, double 2.000000e+00 }, align 16		; <%struct.x*> [#uses=2]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp2 = load float* getelementptr (%struct.x* @X, i32 0, i32 0), align 16		; <float> [#uses=1]
+	%tmp4 = load double* getelementptr (%struct.x* @X, i32 0, i32 1), align 8		; <double> [#uses=1]
+	tail call void @t( float %tmp2, double %tmp4 ) nounwind 
+	ret i32 0
+}
+
+declare void @t(float, double)

diff --git a/test/CodeGen/X86/x86-64-jumps.ll b/test/CodeGen/X86/x86-64-jumps.ll
new file mode 100644
index 0000000..11b40c8
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-jumps.ll

@@ -0,0 +1,45 @@
+; RUN: llc < %s 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define i8 @test1() nounwind ssp {
+entry:
+  %0 = select i1 undef, i8* blockaddress(@test1, %bb), i8* blockaddress(@test1, %bb6) ; <i8*> [#uses=1]
+  indirectbr i8* %0, [label %bb, label %bb6]
+
+bb:                                               ; preds = %entry
+  ret i8 1
+
+bb6:                                              ; preds = %entry
+  ret i8 2
+}
+
+
+; PR5930 - Trunc of block address differences.
+@test.array = internal constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %bar) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %hack) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32)] ; <[3 x i32]*> [#uses=1]
+
+define void @test2(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32                            ; <i32*> [#uses=2]
+  store i32 %i, i32* %i.addr
+  %tmp = load i32* %i.addr                        ; <i32> [#uses=1]
+  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
+  %arrayidx = getelementptr inbounds i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1]
+  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]
+  %idx.ext = sext i32 %tmp1 to i64                ; <i64> [#uses=1]
+  %add.ptr = getelementptr i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1]
+  br label %indirectgoto
+
+foo:                                              ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
+  br label %bar
+
+bar:                                              ; preds = %foo, %indirectgoto
+  br label %hack
+
+hack:                                             ; preds = %bar, %indirectgoto
+  ret void
+
+indirectgoto:                                     ; preds = %entry
+  %indirect.goto.dest = phi i8* [ %add.ptr, %entry ] ; <i8*> [#uses=1]
+  indirectbr i8* %indirect.goto.dest, [label %foo, label %foo, label %bar, label %foo, label %hack, label %foo, label %foo]
+}

diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
new file mode 100644
index 0000000..b4f1fa6
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-malloc.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep {shll.*3, %edi}
+; PR3829
+; The generated code should multiply by 3 (sizeof i8*) as an i32,
+; not as an i64!
+
+define i8** @test(i32 %sz) {
+	%sub = add i32 %sz, 536870911		; <i32> [#uses=1]
+	%call = malloc i8*, i32 %sub		; <i8**> [#uses=1]
+	ret i8** %call
+}

diff --git a/test/CodeGen/X86/x86-64-mem.ll b/test/CodeGen/X86/x86-64-mem.ll
new file mode 100644
index 0000000..d15f516
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-mem.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -o %t1
+; RUN: grep GOTPCREL %t1 | count 4
+; RUN: grep %%rip      %t1 | count 6
+; RUN: grep movq     %t1 | count 6
+; RUN: grep leaq     %t1 | count 1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static -o %t2
+; RUN: grep movl %t2 | count 2
+; RUN: grep movq %t2 | count 2
+
+@ptr = external global i32*		; <i32**> [#uses=1]
+@src = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+@dst = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+@lptr = internal global i32* null		; <i32**> [#uses=1]
+@ldst = internal global [500 x i32] zeroinitializer, align 32		; <[500 x i32]*> [#uses=1]
+@lsrc = internal global [500 x i32] zeroinitializer, align 32		; <[500 x i32]*> [#uses=0]
+@bsrc = internal global [500000 x i32] zeroinitializer, align 32		; <[500000 x i32]*> [#uses=0]
+@bdst = internal global [500000 x i32] zeroinitializer, align 32		; <[500000 x i32]*> [#uses=0]
+
+define void @test1() nounwind {
+	%tmp = load i32* getelementptr ([0 x i32]* @src, i32 0, i32 0)		; <i32> [#uses=1]
+	store i32 %tmp, i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0)
+	ret void
+}
+
+define void @test2() nounwind {
+	store i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0), i32** @ptr
+	ret void
+}
+
+define void @test3() nounwind {
+	store i32* getelementptr ([500 x i32]* @ldst, i32 0, i32 0), i32** @lptr
+	br label %return
+
+return:		; preds = %0
+	ret void
+}

diff --git a/test/CodeGen/X86/x86-64-pic-1.ll b/test/CodeGen/X86/x86-64-pic-1.ll
new file mode 100644
index 0000000..46f6d33
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-1.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+declare void @f()

diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
new file mode 100644
index 0000000..b6f82e2
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-10.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	g@PLT} %t1
+
+@g = alias weak i32 ()* @f
+
+define void @h() {
+entry:
+	%tmp31 = call i32 @g()
+        ret void
+}
+
+declare extern_weak i32 @f()

diff --git a/test/CodeGen/X86/x86-64-pic-11.ll b/test/CodeGen/X86/x86-64-pic-11.ll
new file mode 100644
index 0000000..4db331c
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-11.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	__fixunsxfti@PLT} %t1
+
+define i128 @f(x86_fp80 %a) nounwind {
+entry:
+	%tmp78 = fptoui x86_fp80 %a to i128
+	ret i128 %tmp78
+}

diff --git a/test/CodeGen/X86/x86-64-pic-2.ll b/test/CodeGen/X86/x86-64-pic-2.ll
new file mode 100644
index 0000000..1ce2de7
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-2.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f} %t1
+; RUN: not grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+declare hidden void @f()

diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
new file mode 100644
index 0000000..aa3c888
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-3.ll

@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f} %t1
+; RUN: not grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+define internal void @f() {
+entry:
+	ret void
+}

diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll
new file mode 100644
index 0000000..90fc119
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-4.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movq	a@GOTPCREL(%rip),} %t1
+
+@a = global i32 0
+
+define i32 @get_a() {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll
new file mode 100644
index 0000000..6369bde
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-5.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movl	a(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+@a = hidden global i32 0
+
+define i32 @get_a() {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
new file mode 100644
index 0000000..6e19ad3
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-6.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movl	a(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+@a = internal global i32 0
+
+define i32 @get_a() nounwind {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}

diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll
new file mode 100644
index 0000000..4d98ee6
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-7.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
+
+define void ()* @g() nounwind {
+entry:
+	ret void ()* @f
+}
+
+declare void @f()

diff --git a/test/CodeGen/X86/x86-64-pic-8.ll b/test/CodeGen/X86/x86-64-pic-8.ll
new file mode 100644
index 0000000..d3b567c
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-8.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {leaq	f(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+define void ()* @g() {
+entry:
+	ret void ()* @f
+}
+
+declare hidden void @f()

diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll
new file mode 100644
index 0000000..0761031
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-pic-9.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {leaq	f(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+define void ()* @g() nounwind {
+entry:
+	ret void ()* @f
+}
+
+define internal void @f() nounwind {
+entry:
+	ret void
+}

diff --git a/test/CodeGen/X86/x86-64-ret0.ll b/test/CodeGen/X86/x86-64-ret0.ll
new file mode 100644
index 0000000..c74f6d8
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-ret0.ll

@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+define i32 @f() nounwind  {
+	tail call void @t( i32 1 ) nounwind 
+	ret i32 0
+}
+
+declare void @t(i32)

diff --git a/test/CodeGen/X86/x86-64-shortint.ll b/test/CodeGen/X86/x86-64-shortint.ll
new file mode 100644
index 0000000..7f96543
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-shortint.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s | grep movswl
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-apple-darwin8"
+
+
+define void @bar(i16 zeroext  %A) {
+        tail call void @foo( i16 %A signext  )
+        ret void
+}
+declare void @foo(i16 signext )
+

diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
new file mode 100644
index 0000000..7b5f189
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-sret-return.ll

@@ -0,0 +1,63 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.foo = type { [4 x i64] }
+
+; CHECK: bar:
+; CHECK: movq %rdi, %rax
+define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
+entry:
+	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
+	%memtmp = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.foo* %d, %struct.foo** %d_addr
+	%tmp = load %struct.foo** %d_addr, align 8		; <%struct.foo*> [#uses=1]
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp5 = load i64* %tmp4, align 8		; <i64> [#uses=1]
+	store i64 %tmp5, i64* %tmp3, align 8
+	%tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp8 = load i64* %tmp7, align 8		; <i64> [#uses=1]
+	store i64 %tmp8, i64* %tmp6, align 8
+	%tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp11 = load i64* %tmp10, align 8		; <i64> [#uses=1]
+	store i64 %tmp11, i64* %tmp9, align 8
+	%tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp14 = load i64* %tmp13, align 8		; <i64> [#uses=1]
+	store i64 %tmp14, i64* %tmp12, align 8
+	%tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp19 = load i64* %tmp18, align 8		; <i64> [#uses=1]
+	store i64 %tmp19, i64* %tmp17, align 8
+	%tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp22 = load i64* %tmp21, align 8		; <i64> [#uses=1]
+	store i64 %tmp22, i64* %tmp20, align 8
+	%tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp25 = load i64* %tmp24, align 8		; <i64> [#uses=1]
+	store i64 %tmp25, i64* %tmp23, align 8
+	%tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp27, align 8		; <i64> [#uses=1]
+	store i64 %tmp28, i64* %tmp26, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+; CHECK: foo:
+; CHECK: movq %rdi, %rax
+define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
+  store { i64 } { i64 0 }, { i64 }* %agg.result
+  ret void
+}

diff --git a/test/CodeGen/X86/x86-64-varargs.ll b/test/CodeGen/X86/x86-64-varargs.ll
new file mode 100644
index 0000000..428f449
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-varargs.ll

@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
+
+@.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00"		; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...) nounwind 
+
+define i32 @main() nounwind  {
+entry:
+	%tmp10.i = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}

diff --git a/test/CodeGen/X86/x86-frameaddr.ll b/test/CodeGen/X86/x86-frameaddr.ll
new file mode 100644
index 0000000..d595874
--- /dev/null
+++ b/test/CodeGen/X86/x86-frameaddr.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep mov | grep ebp
+
+define i8* @t() nounwind {
+entry:
+	%0 = tail call i8* @llvm.frameaddress(i32 0)
+	ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone

diff --git a/test/CodeGen/X86/x86-frameaddr2.ll b/test/CodeGen/X86/x86-frameaddr2.ll
new file mode 100644
index 0000000..c509115
--- /dev/null
+++ b/test/CodeGen/X86/x86-frameaddr2.ll

@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep mov | count 3
+
+define i8* @t() nounwind {
+entry:
+	%0 = tail call i8* @llvm.frameaddress(i32 2)
+	ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone

diff --git a/test/CodeGen/X86/x86-store-gv-addr.ll b/test/CodeGen/X86/x86-store-gv-addr.ll
new file mode 100644
index 0000000..089517a
--- /dev/null
+++ b/test/CodeGen/X86/x86-store-gv-addr.ll

@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
+
+@v = external global i32, align 8
+@v_addr = external global i32*, align 8
+
+define void @t() nounwind optsize {
+	store i32* @v, i32** @v_addr, align 8
+	unreachable
+}

diff --git a/test/CodeGen/X86/xmm-r64.ll b/test/CodeGen/X86/xmm-r64.ll
new file mode 100644
index 0000000..2a6b5c7
--- /dev/null
+++ b/test/CodeGen/X86/xmm-r64.ll

@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64
+
+define <4 x i32> @test() {
+        %tmp1039 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )               ; <<4 x i32>> [#uses=1]
+        %tmp1040 = bitcast <4 x i32> %tmp1039 to <2 x i64>              ; <<2 x i64>> [#uses=1]
+        %tmp1048 = add <2 x i64> %tmp1040, zeroinitializer              ; <<2 x i64>> [#uses=1]
+        %tmp1048.upgrd.1 = bitcast <2 x i64> %tmp1048 to <4 x i32>              ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp1048.upgrd.1
+}
+
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>)
+

diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll
new file mode 100644
index 0000000..a6bdb13
--- /dev/null
+++ b/test/CodeGen/X86/xor-icmp.ll

@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+define i32 @t(i32 %a, i32 %b) nounwind ssp {
+entry:
+; X32:     t:
+; X32:     xorb
+; X32-NOT: andb
+; X32-NOT: shrb
+; X32:     testb $64
+; X32:     jne
+
+; X64:     t:
+; X64-NOT: setne
+; X64:     xorl
+; X64:     testb $64
+; X64:     jne
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)

diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
new file mode 100644
index 0000000..9bfff8a
--- /dev/null
+++ b/test/CodeGen/X86/xor.ll

@@ -0,0 +1,144 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define <4 x i32> @test1() nounwind {
+	%tmp = xor <4 x i32> undef, undef
+	ret <4 x i32> %tmp
+        
+; X32: test1:
+; X32:	xorps	%xmm0, %xmm0
+; X32:	ret
+}
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define i32 @test2() nounwind{
+	%tmp = xor i32 undef, undef
+	ret i32 %tmp
+; X32: test2:
+; X32:	xorl	%eax, %eax
+; X32:	ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind  {
+entry:
+        %tmp1not = xor i32 %b, -2
+	%tmp3 = and i32 %tmp1not, %a
+        %tmp4 = lshr i32 %tmp3, 1
+        ret i32 %tmp4
+        
+; X64: test3:
+; X64:	notl	%esi
+; X64:	andl	%edi, %esi
+; X64:	movl	%esi, %eax
+; X64:	shrl	%eax
+; X64:	ret
+
+; X32: test3:
+; X32: 	movl	8(%esp), %eax
+; X32: 	notl	%eax
+; X32: 	andl	4(%esp), %eax
+; X32: 	shrl	%eax
+; X32: 	ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483647
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+        
+; X64: test4:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test4:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i16 @test5(i16 %a, i16 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i16 %a_addr.0, %b_addr.0
+        %tmp4not = xor i16 %tmp3, 32767
+        %tmp6 = and i16 %tmp4not, %b_addr.0
+        %tmp8 = shl i16 %tmp6, 1
+        %tmp10 = icmp eq i16 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i16 %tmp3
+; X64: test5:
+; X64:    notw	[[REG:%[a-z]+]]
+; X64:    andw	{{.*}}[[REG]]
+; X32: test5:
+; X32:    notw	[[REG:%[a-z]+]]
+; X32:    andw	{{.*}}[[REG]]
+}
+
+define i8 @test6(i8 %a, i8 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i8 %a_addr.0, %b_addr.0
+        %tmp4not = xor i8 %tmp3, 127
+        %tmp6 = and i8 %tmp4not, %b_addr.0
+        %tmp8 = shl i8 %tmp6, 1
+        %tmp10 = icmp eq i8 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i8 %tmp3
+; X64: test6:
+; X64:    notb	[[REG:%[a-z]+]]
+; X64:    andb	{{.*}}[[REG]]
+; X32: test6:
+; X32:    notb	[[REG:%[a-z]+]]
+; X32:    andb	{{.*}}[[REG]]
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483646
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+; X64: test7:
+; X64:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test7:
+; X32:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i32 @test8(i32 %a) nounwind {
+; rdar://7553032
+entry:
+  %t1 = sub i32 0, %a
+  %t2 = add i32 %t1, -1
+  ret i32 %t2
+; X64: test8:
+; X64:   notl %eax
+; X32: test8:
+; X32:   notl %eax
+}

diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
new file mode 100644
index 0000000..3e3bb95
--- /dev/null
+++ b/test/CodeGen/X86/zero-remat.ll

@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -stats  -info-output-file - | grep asm-printer  | grep 12
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+declare void @bar(double %x)
+declare void @barf(float %x)
+
+define double @foo() nounwind {
+
+  call void @bar(double 0.0)
+  ret double 0.0
+
+;CHECK-32: foo:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foo:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
+}
+
+
+define float @foof() nounwind {
+  call void @barf(float 0.0)
+  ret float 0.0
+
+;CHECK-32: foof:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foof:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
+}

diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll
new file mode 100644
index 0000000..ae6221a
--- /dev/null
+++ b/test/CodeGen/X86/zext-inreg-0.ll

@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep and %t
+; RUN: not grep movzbq %t
+; RUN: not grep movzwq %t
+; RUN: not grep movzlq %t
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having a ZERO_EXTEND_REG opcode.
+
+define i32 @a(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 255
+        ret i32 %retval
+}
+define i32 @b(float %d) nounwind  {
+        %tmp12 = fptoui float %d to i8
+        %retval = zext i8 %tmp12 to i32
+        ret i32 %retval
+}
+define i32 @c(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 65535
+        ret i32 %retval
+}
+define i64 @d(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 255
+        ret i64 %retval
+}
+define i64 @e(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 65535
+        ret i64 %retval
+}
+define i64 @f(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 4294967295
+        ret i64 %retval
+}
+
+define i32 @g(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i32
+        ret i32 %retval
+}
+define i32 @h(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i32
+        ret i32 %retval
+}
+define i64 @i(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i64
+        ret i64 %retval
+}
+define i64 @j(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i64
+        ret i64 %retval
+}
+define i64 @k(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = zext i32 %e to i64
+        ret i64 %retval
+}

diff --git a/test/CodeGen/X86/zext-inreg-1.ll b/test/CodeGen/X86/zext-inreg-1.ll
new file mode 100644
index 0000000..17fe374
--- /dev/null
+++ b/test/CodeGen/X86/zext-inreg-1.ll

@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | not grep and
+
+; These tests differ from the ones in zext-inreg-0.ll in that
+; on x86-64 they do require and instructions.
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having ZERO_EXTEND_REG node.
+
+define i64 @l(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 1099511627775
+        ret i64 %retval
+}
+define i64 @m(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 281474976710655
+        ret i64 %retval
+}

diff --git a/test/CodeGen/X86/zext-shl.ll b/test/CodeGen/X86/zext-shl.ll
new file mode 100644
index 0000000..928848e
--- /dev/null
+++ b/test/CodeGen/X86/zext-shl.ll

@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32 @t1(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: shll
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = shl i16 %0, 5
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t2(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: shrl
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = lshr i16 %0, 3
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}

diff --git a/test/CodeGen/X86/zext-trunc.ll b/test/CodeGen/X86/zext-trunc.ll
new file mode 100644
index 0000000..b9ffbe8
--- /dev/null
+++ b/test/CodeGen/X86/zext-trunc.ll

@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7570931
+
+define i64 @foo(i64 %a, i64 %b) nounwind {
+; CHECK: foo:
+; CHECK: leal
+; CHECK-NOT: movl
+; CHECK: ret
+  %c = add i64 %a, %b
+  %d = trunc i64 %c to i32
+  %e = zext i32 %d to i64
+  ret i64 %e
+}
commit	e264f62ca09a8f65c87a46d562a4d0f9ec5d457e	[log] [tgz]
author	Shih-wei Liao <sliao@google.com>	Wed Feb 10 11:10:31 2010 -0800
committer	Shih-wei Liao <sliao@google.com>	Wed Feb 10 11:10:31 2010 -0800
tree	59e3d57ef656cef79afa708ae0a3daf25cd91fcf