Revert "Codegen: Tail-duplicate during placement."

This reverts commit ff234efbe23528e4f4c80c78057b920a51f434b2.

Causing crashes on aarch64 build.

llvm-svn: 283172
diff --git a/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll b/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll
index 5dd8cb2..92ce2a0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll
@@ -12,6 +12,7 @@
   %tmp2 = load i16, i16* %ptr, align 2
   br label %bb2
 bb2:
+; CHECK: %bb2
 ; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
 ; CHECK: cmp [[REG]], #23
   %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll
index e9fa680..032199e 100644
--- a/llvm/test/CodeGen/AArch64/machine_cse.ll
+++ b/llvm/test/CodeGen/AArch64/machine_cse.ll
@@ -1,8 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s
-; -tail-dup-placement causes tail duplication during layout. This breaks the
-; assumptions of the test case as written (specifically, it creates an
-; additional cmp instruction, creating a false positive), so we pass
-; -tail-dup-placement=0 to restore the original behavior
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
 
 ; marked as external to prevent possible optimizations
 @a = external global i32
diff --git a/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
index 83c7676..15b7e94 100644
--- a/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
@@ -25,6 +25,7 @@
   br label %bb2
 
 bb2:                                              ; preds = %bb1, %entry
+; CHECK: bb2
 ; CHECK: cmp [[REG]], #0
 ; CHECK: ble
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
diff --git a/llvm/test/CodeGen/PowerPC/branch-opt.ll b/llvm/test/CodeGen/PowerPC/branch-opt.ll
index e714972..b3c0dba 100644
--- a/llvm/test/CodeGen/PowerPC/branch-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/branch-opt.ll
@@ -1,21 +1,9 @@
-; RUN: llc -verify-machineinstrs < %s -march=ppc32 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -march=ppc32 | \
+; RUN:   grep "b LBB.*" | count 4
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin8.7.0"
 
-;CHECK-LABEL: foo:
-; There are 4 inner loops (%bb, %bb12, %bb25, %bb38) that all exit to %cond_next48
-; The last (whichever it is) should have a fallthrough exit, and the other three
-; need an unconditional branch. No other block should have an unconditional
-; branch to cond_next48
-; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy
-; of %cond_next48, so there should only be two unconditional branches.
-
-;CHECK: b LBB0_13
-;CHECK: b LBB0_13
-;CHECK-NOT: b LBB0_13
-;CHECK: LBB0_13: ; %cond_next48
-
 define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
 entry:
 	%tmp1 = and i32 %W, 1		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/sjlj.ll b/llvm/test/CodeGen/PowerPC/sjlj.ll
index df4fb95..8c6682c 100644
--- a/llvm/test/CodeGen/PowerPC/sjlj.ll
+++ b/llvm/test/CodeGen/PowerPC/sjlj.ll
@@ -74,24 +74,24 @@
 ; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
 ; CHECK-DAG: std 1, 16([[REGA]])
 ; CHECK-DAG: std 2, 24([[REGA]])
-; CHECK: bcl 20, 31, .LBB1_3
+; CHECK: bcl 20, 31, .LBB1_5
 ; CHECK: li 3, 1
-; CHECK: #EH_SjLj_Setup	.LBB1_3
+; CHECK: #EH_SjLj_Setup	.LBB1_5
 ; CHECK: b .LBB1_1
 
-; CHECK: .LBB1_3:
-; CHECK: mflr [[REGL:[0-9]+]]
-; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
-; CHECK: std [[REGL]], 8([[REG2]])
-; CHECK: li 3, 0
-
-; CHECK: .LBB1_5:
+; CHECK: .LBB1_4:
 
 ; CHECK: lfd
 ; CHECK: lvx
 ; CHECK: ld
 ; CHECK: blr
 
+; CHECK: .LBB1_5:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
 ; CHECK-NOAV: @main
 ; CHECK-NOAV-NOT: stvx
 ; CHECK-NOAV: bcl
diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll
deleted file mode 100644
index 6790aa8..0000000
--- a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll
+++ /dev/null
@@ -1,100 +0,0 @@
-; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-n32:64"
-target triple = "powerpc64le-grtev4-linux-gnu"
-
-; Intended layout:
-; The outlining flag produces the layout
-; test1
-; test2
-; test3
-; test4
-; exit
-; optional1
-; optional2
-; optional3
-; optional4
-; Tail duplication puts test n+1 at the end of optional n
-; so optional1 includes a copy of test2 at the end, and branches
-; to test3 (at the top) or falls through to optional 2.
-; The CHECK statements check for the whole string of tests and exit block,
-; and then check that the correct test has been duplicated into the end of
-; the optional blocks and that the optional blocks are in the correct order.
-;CHECK-LABEL: f:
-; test1 may have been merged with entry
-;CHECK: mr [[TAGREG:[0-9]+]], 3
-;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
-;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
-;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4
-;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
-;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
-;CHECK: blr
-;CHECK-NEXT: [[OPT1LABEL]]
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
-;CHECK-NEXT: beq 0, [[TEST3LABEL]]
-;CHECK-NEXT: [[OPT2LABEL]]
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
-;CHECK-NEXT: beq 0, [[TEST4LABEL]]
-;CHECK-NEXT: [[OPT3LABEL]]
-;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
-;CHECK-NEXT: beq 0, [[EXITLABEL]]
-;CHECK-NEXT: [[OPT4LABEL]]
-;CHECK: b [[EXITLABEL]]
-
-define void @f(i32 %tag) {
-entry:
-  br label %test1
-test1:
-  %tagbit1 = and i32 %tag, 1
-  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
-  br i1 %tagbit1eq0, label %test2, label %optional1
-optional1:
-  call void @a()
-  call void @a()
-  call void @a()
-  call void @a()
-  br label %test2
-test2:
-  %tagbit2 = and i32 %tag, 2
-  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
-  br i1 %tagbit2eq0, label %test3, label %optional2
-optional2:
-  call void @b()
-  call void @b()
-  call void @b()
-  call void @b()
-  br label %test3
-test3:
-  %tagbit3 = and i32 %tag, 4
-  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
-  br i1 %tagbit3eq0, label %test4, label %optional3
-optional3:
-  call void @c()
-  call void @c()
-  call void @c()
-  call void @c()
-  br label %test4
-test4:
-  %tagbit4 = and i32 %tag, 8
-  %tagbit4eq0 = icmp eq i32 %tagbit4, 0
-  br i1 %tagbit4eq0, label %exit, label %optional4
-optional4:
-  call void @d()
-  call void @d()
-  call void @d()
-  call void @d()
-  br label %exit
-exit:
-  ret void
-}
-
-declare void @a()
-declare void @b()
-declare void @c()
-declare void @d()
diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll
index 428095f..9a03622 100644
--- a/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll
+++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false | FileCheck %s
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s
 
 ; Test the CFG stackifier pass.
 
diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll
index 0ac1e1e..71787fe 100644
--- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0| FileCheck %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s
 
 ; Test memcpy, memmove, and memset intrinsics.
 
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index 39e2964..b690316 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -177,12 +177,6 @@
   ret i32 %sum
 }
 
-; Tail duplication during layout can entirely remove body0 by duplicating it
-; into the entry block and into body1. This is a good thing but it isn't what
-; this test is looking for. So to make the blocks longer so they don't get
-; duplicated, we add some calls to dummy.
-declare void @dummy()
-
 define i32 @test_loop_rotate(i32 %i, i32* %a) {
 ; Check that we rotate conditional exits from the loop to the bottom of the
 ; loop, eliminating unconditional branches to the top.
@@ -200,8 +194,6 @@
   %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
-  call void @dummy()
-  call void @dummy()
   br i1 %exitcond, label %exit, label %body1
 
 body1:
@@ -953,7 +945,7 @@
 ; First rotated loop top.
 ; CHECK: .p2align
 ; CHECK: %while.end
-; %for.cond gets completely tail-duplicated away.
+; CHECK: %for.cond
 ; CHECK: %if.then
 ; CHECK: %if.else
 ; CHECK: %if.end10
diff --git a/llvm/test/CodeGen/X86/cmov-into-branch.ll b/llvm/test/CodeGen/X86/cmov-into-branch.ll
index 6e4762b..c0c6fc4 100644
--- a/llvm/test/CodeGen/X86/cmov-into-branch.ll
+++ b/llvm/test/CodeGen/X86/cmov-into-branch.ll
@@ -105,11 +105,9 @@
 ; CHECK-NEXT:    testl %edi, %edi
 ; CHECK-NEXT:    je [[LABEL_BB6:.*]]
 ; CHECK:         movl %edi, %eax
-; CHECK-NEXT:    retq
 ; CHECK:         [[LABEL_BB6]]
 ; CHECK-NEXT:    movl %esi, %edi
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    jmp
 ;
   %cmp = icmp ne i32 %a, 0
   %sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
diff --git a/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll b/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
index 78e7471..8d0318b 100644
--- a/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
+++ b/llvm/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
@@ -2,7 +2,7 @@
 
 ; CHECK-LABEL: fmaddsubpd_loop_128:
 ; CHECK:   vfmaddsub231pd %xmm1, %xmm0, %xmm2
-; CHECK:   vmovapd %xmm2, %xmm0
+; CHECK:   vmovaps %xmm2, %xmm0
 ; CHECK-NEXT: retq
 define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 entry:
@@ -28,7 +28,7 @@
 
 ; CHECK-LABEL: fmsubaddpd_loop_128:
 ; CHECK:   vfmsubadd231pd %xmm1, %xmm0, %xmm2
-; CHECK:   vmovapd %xmm2, %xmm0
+; CHECK:   vmovaps %xmm2, %xmm0
 ; CHECK-NEXT: retq
 define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 entry:
@@ -54,7 +54,7 @@
 
 ; CHECK-LABEL: fmaddpd_loop_128:
 ; CHECK:   vfmadd231pd %xmm1, %xmm0, %xmm2
-; CHECK:   vmovapd %xmm2, %xmm0
+; CHECK:   vmovaps %xmm2, %xmm0
 ; CHECK-NEXT: retq
 define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 entry:
@@ -80,7 +80,7 @@
 
 ; CHECK-LABEL: fmsubpd_loop_128:
 ; CHECK:   vfmsub231pd %xmm1, %xmm0, %xmm2
-; CHECK:   vmovapd %xmm2, %xmm0
+; CHECK:   vmovaps %xmm2, %xmm0
 ; CHECK-NEXT: retq
 define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 entry:
@@ -106,7 +106,7 @@
 
 ; CHECK-LABEL: fnmaddpd_loop_128:
 ; CHECK:   vfnmadd231pd %xmm1, %xmm0, %xmm2
-; CHECK:   vmovapd %xmm2, %xmm0
+; CHECK:   vmovaps %xmm2, %xmm0
 ; CHECK-NEXT: retq
 define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 entry:
@@ -132,7 +132,7 @@
 
 ; CHECK-LABEL: fnmsubpd_loop_128:
 ; CHECK:   vfnmsub231pd %xmm1, %xmm0, %xmm2
-; CHECK:   vmovapd %xmm2, %xmm0
+; CHECK:   vmovaps %xmm2, %xmm0
 ; CHECK-NEXT: retq
 define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
 entry:
@@ -329,7 +329,7 @@
 
 ; CHECK-LABEL: fmaddsubpd_loop_256:
 ; CHECK:   vfmaddsub231pd %ymm1, %ymm0, %ymm2
-; CHECK:   vmovapd %ymm2, %ymm0
+; CHECK:   vmovaps %ymm2, %ymm0
 ; CHECK-NEXT: retq
 define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
@@ -355,7 +355,7 @@
 
 ; CHECK-LABEL: fmsubaddpd_loop_256:
 ; CHECK:   vfmsubadd231pd %ymm1, %ymm0, %ymm2
-; CHECK:   vmovapd %ymm2, %ymm0
+; CHECK:   vmovaps %ymm2, %ymm0
 ; CHECK-NEXT: retq
 define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
@@ -381,7 +381,7 @@
 
 ; CHECK-LABEL: fmaddpd_loop_256:
 ; CHECK:   vfmadd231pd %ymm1, %ymm0, %ymm2
-; CHECK:   vmovapd %ymm2, %ymm0
+; CHECK:   vmovaps %ymm2, %ymm0
 ; CHECK-NEXT: retq
 define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
@@ -407,7 +407,7 @@
 
 ; CHECK-LABEL: fmsubpd_loop_256:
 ; CHECK:   vfmsub231pd %ymm1, %ymm0, %ymm2
-; CHECK:   vmovapd %ymm2, %ymm0
+; CHECK:   vmovaps %ymm2, %ymm0
 ; CHECK-NEXT: retq
 define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
@@ -433,7 +433,7 @@
 
 ; CHECK-LABEL: fnmaddpd_loop_256:
 ; CHECK:   vfnmadd231pd %ymm1, %ymm0, %ymm2
-; CHECK:   vmovapd %ymm2, %ymm0
+; CHECK:   vmovaps %ymm2, %ymm0
 ; CHECK-NEXT: retq
 define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
@@ -459,7 +459,7 @@
 
 ; CHECK-LABEL: fnmsubpd_loop_256:
 ; CHECK:   vfnmsub231pd %ymm1, %ymm0, %ymm2
-; CHECK:   vmovapd %ymm2, %ymm0
+; CHECK:   vmovaps %ymm2, %ymm0
 ; CHECK-NEXT: retq
 define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
diff --git a/llvm/test/CodeGen/X86/fp-une-cmp.ll b/llvm/test/CodeGen/X86/fp-une-cmp.ll
index e3b2a04..6530400 100644
--- a/llvm/test/CodeGen/X86/fp-une-cmp.ll
+++ b/llvm/test/CodeGen/X86/fp-une-cmp.ll
@@ -56,11 +56,11 @@
 ; CHECK-NEXT:    ucomisd %xmm1, %xmm0
 ; CHECK-NEXT:    jne .LBB1_1
 ; CHECK-NEXT:    jp .LBB1_1
-; CHECK-NEXT:  # %bb2
+; CHECK-NEXT:  .LBB1_2: # %bb2
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB1_1: # %bb1
 ; CHECK-NEXT:    addsd {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    jmp .LBB1_2
 
 entry:
   %mul = fmul double %x, %y
diff --git a/llvm/test/CodeGen/X86/pr11202.ll b/llvm/test/CodeGen/X86/pr11202.ll
index cb1a749..13070d1 100644
--- a/llvm/test/CodeGen/X86/pr11202.ll
+++ b/llvm/test/CodeGen/X86/pr11202.ll
@@ -15,8 +15,5 @@
   br label %l1
 }
 
-; It is correct for either l1 or l2 to be removed.
-; If l2 is removed, the message should be "Address of block that was removed by CodeGen"
-; If l1 is removed, it should be "Block address taken."
-; CHECK: .Ltmp0:                                 # {{Address of block that was removed by CodeGen|Block address taken}}
+; CHECK: .Ltmp0:                                 # Address of block that was removed by CodeGen
 ; CHECK: .quad	.Ltmp0
diff --git a/llvm/test/CodeGen/X86/ragreedy-bug.ll b/llvm/test/CodeGen/X86/ragreedy-bug.ll
index bfeb041..e842631 100644
--- a/llvm/test/CodeGen/X86/ragreedy-bug.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-bug.ll
@@ -3,34 +3,16 @@
 ; This testing case is reduced from 197.parser prune_match function.
 ; We make sure register copies are not generated on isupper.exit blocks.
 
-; isupper.exit and isupper.exit223 get tail-duplicated into all their
-; predecessors.
-; CHECK: cond.true.i.i
+; CHECK: isupper.exit
 ; CHECK-NEXT: in Loop
-; Mem-move
-; CHECK-NEXT: movl
-; CHECK-NEXT: andl
 ; CHECK-NEXT: testl
 ; CHECK-NEXT: jne
-; CHECK: cond.true.i.i217
+; CHECK: isupper.exit
 ; CHECK-NEXT: in Loop
-; Mem-move
-; CHECK-NEXT: movl
-; CHECK-NEXT: andl
 ; CHECK-NEXT: testl
 ; CHECK-NEXT: je
-; CHECK: cond.false.i.i
 ; CHECK: maskrune
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: testl
-; CHECK-NEXT: je
-; CHECK: cond.false.i.i219
 ; CHECK: maskrune
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: movzbl
-; CHECK-NEXT: testl
-; CHECK-NEXT: jne
 
 %struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* }
 %struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* }
diff --git a/llvm/test/CodeGen/X86/sse1.ll b/llvm/test/CodeGen/X86/sse1.ll
index beedb1d..3ac6ea6 100644
--- a/llvm/test/CodeGen/X86/sse1.ll
+++ b/llvm/test/CodeGen/X86/sse1.ll
@@ -58,23 +58,21 @@
 ; X32-NEXT:    je .LBB1_1
 ; X32-NEXT:  # BB#2: # %entry
 ; X32-NEXT:    xorps %xmm1, %xmm1
-; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X32-NEXT:    jne .LBB1_5
-; X32-NEXT:    jmp .LBB1_4
+; X32-NEXT:    jmp .LBB1_3
 ; X32-NEXT:  .LBB1_1:
 ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:  .LBB1_3: # %entry
 ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    je .LBB1_4
-; X32-NEXT:  .LBB1_5: # %entry
+; X32-NEXT:  # BB#5: # %entry
 ; X32-NEXT:    xorps %xmm2, %xmm2
-; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X32-NEXT:    jne .LBB1_8
-; X32-NEXT:    jmp .LBB1_7
+; X32-NEXT:    jmp .LBB1_6
 ; X32-NEXT:  .LBB1_4:
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-NEXT:  .LBB1_6: # %entry
 ; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
 ; X32-NEXT:    je .LBB1_7
-; X32-NEXT:  .LBB1_8: # %entry
+; X32-NEXT:  # BB#8: # %entry
 ; X32-NEXT:    xorps %xmm3, %xmm3
 ; X32-NEXT:    jmp .LBB1_9
 ; X32-NEXT:  .LBB1_7:
@@ -97,23 +95,21 @@
 ; X64-NEXT:    je .LBB1_1
 ; X64-NEXT:  # BB#2: # %entry
 ; X64-NEXT:    xorps %xmm1, %xmm1
-; X64-NEXT:    testl %edx, %edx
-; X64-NEXT:    jne .LBB1_5
-; X64-NEXT:    jmp .LBB1_4
+; X64-NEXT:    jmp .LBB1_3
 ; X64-NEXT:  .LBB1_1:
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB1_3: # %entry
 ; X64-NEXT:    testl %edx, %edx
 ; X64-NEXT:    je .LBB1_4
-; X64-NEXT:  .LBB1_5: # %entry
+; X64-NEXT:  # BB#5: # %entry
 ; X64-NEXT:    xorps %xmm2, %xmm2
-; X64-NEXT:    testl %r8d, %r8d
-; X64-NEXT:    jne .LBB1_8
-; X64-NEXT:    jmp .LBB1_7
+; X64-NEXT:    jmp .LBB1_6
 ; X64-NEXT:  .LBB1_4:
 ; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-NEXT:  .LBB1_6: # %entry
 ; X64-NEXT:    testl %r8d, %r8d
 ; X64-NEXT:    je .LBB1_7
-; X64-NEXT:  .LBB1_8: # %entry
+; X64-NEXT:  # BB#8: # %entry
 ; X64-NEXT:    xorps %xmm3, %xmm3
 ; X64-NEXT:    jmp .LBB1_9
 ; X64-NEXT:  .LBB1_7:
diff --git a/llvm/test/CodeGen/X86/tail-dup-repeat.ll b/llvm/test/CodeGen/X86/tail-dup-repeat.ll
deleted file mode 100644
index 21b48e1..0000000
--- a/llvm/test/CodeGen/X86/tail-dup-repeat.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc -O2 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: uwtable
-; When tail-duplicating during placement, we work backward from blocks with
-; multiple successors. In this case, the block dup1 gets duplicated into dup2
-; and if.then64, and then the block dup2 gets duplicated into land.lhs.true
-; and if.end70
-; CHECK-LABEL: repeated_tail_dup:
-define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6) #0 align 2 {
-entry:
-  br label %for.cond
-
-; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %for.cond
-for.cond:                                         ; preds = %dup1, %entry
-  br i1 %a1, label %land.lhs.true, label %if.end56
-
-land.lhs.true:                                    ; preds = %for.cond
-  store i32 10, i32* %a4, align 8
-  br label %dup2
-
-if.end56:                                         ; preds = %for.cond
-  br i1 %a2, label %if.then64, label %if.end70
-
-if.then64:                                        ; preds = %if.end56
-  store i8 1, i8* %a6, align 1
-  br label %dup1
-
-; CHECK:      # %if.end70
-; CHECK-NEXT: # in Loop:
-; CHECK-NEXT: movl $12, (%rdx)
-; CHECK-NEXT: movl $2, (%rcx)
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .[[HEADER]]
-if.end70:                                         ; preds = %if.end56
-  store i32 12, i32* %a4, align 8
-  br label %dup2
-
-dup2:                                             ; preds = %if.end70, %land.lhs.true
-  store i32 2, i32* %a5, align 4
-  br label %dup1
-
-dup1:                                             ; preds = %dup2, %if.then64
-  %val = load i32, i32* %a4, align 8
-  %switch = icmp ult i32 undef, 1
-  br i1 %switch, label %for.cond, label %for.end
-
-for.end:                                          ; preds = %dup1
-  ret void
-}
-
-attributes #0 = { uwtable }
diff --git a/llvm/test/CodeGen/X86/update-terminator.mir b/llvm/test/CodeGen/X86/update-terminator.mir
index 2e8e85b..1e75c6a 100644
--- a/llvm/test/CodeGen/X86/update-terminator.mir
+++ b/llvm/test/CodeGen/X86/update-terminator.mir
@@ -5,30 +5,17 @@
   @a = external global i16
   @b = external global i32
 
-  declare void @dummy1()
-  declare void @dummy2()
-  declare void @dummy3()
-
   ; Function Attrs: nounwind
   define void @f2() {
     br i1 undef, label %bb1, label %bb3
 
   bb1:
-    call void @dummy1()
-    call void @dummy1()
-    call void @dummy1()
     br i1 undef, label %bb2, label %bb2
 
   bb2:
-    call void @dummy2()
-    call void @dummy2()
-    call void @dummy2()
     br label %bb4
 
   bb3:
-    call void @dummy3()
-    call void @dummy3()
-    call void @dummy3()
     br label %bb2
 
   bb4:
@@ -53,24 +40,15 @@
   bb.1:
     successors: %bb.2(100)
 
-    CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
-    CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
-    CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
     JNE_1 %bb.2, implicit %eflags
 
   bb.2:
     successors: %bb.4(100)
 
-    CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
-    CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
-    CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
     JMP_1 %bb.4
 
   bb.3:
     successors: %bb.2(100)
-    CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
-    CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
-    CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
     JMP_1 %bb.2
 
   bb.4: