AArch64: improve redundant copy elimination.

Mostly, this fixes the bug that if the CBZ guaranteed Xn but Wn was used, we
didn't sort out the use-def chain properly.

I've also made it check more than just the last instruction for a compatible
CBZ (so it can cope without fallthroughs). I'd have liked to do that
separately, but it's helps writing the test.

Finally, I removed some custom loops in favour of MachineInstr helpers and
refactored the control flow to flatten it and avoid possibly quadratic
iterations in blocks with many copies. NFC for these, just a general tidy-up.

llvm-svn: 261154
diff --git a/llvm/test/CodeGen/AArch64/machine-copy-remove.ll b/llvm/test/CodeGen/AArch64/machine-copy-remove.ll
index 7196d43..6a97ead 100644
--- a/llvm/test/CodeGen/AArch64/machine-copy-remove.ll
+++ b/llvm/test/CodeGen/AArch64/machine-copy-remove.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
 
-; CHECK-LABEL: f_XX
+; CHECK-LABEL: f_XX:
 ; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
 ; CHECK: [[BB]]:
 ; CHECK-NOT: mov x[[REG]], xzr
@@ -18,7 +18,7 @@
   ret i64 %a.0
 }
 
-; CHECK-LABEL: f_WW
+; CHECK-LABEL: f_WW:
 ; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
 ; CHECK: [[BB]]:
 ; CHECK-NOT: mov w[[REG]], wzr
@@ -36,7 +36,7 @@
   ret i32 %a.0
 }
 
-; CHECK-LABEL: f_XW
+; CHECK-LABEL: f_XW:
 ; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
 ; CHECK: [[BB]]:
 ; CHECK-NOT: mov w[[REG]], wzr
@@ -54,7 +54,7 @@
   ret i32 %a.0
 }
 
-; CHECK-LABEL: f_WX
+; CHECK-LABEL: f_WX:
 ; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
 ; CHECK: [[BB]]:
 ; CHECK: mov x[[REG]], xzr
@@ -73,3 +73,22 @@
   %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ]
   ret i64 %a.0
 }
+
+; CHECK-LABEL: test_superreg:
+; CHECK:     cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK:     str x[[REG]], [x1]
+; CHECK-NOT: mov w[[REG]], wzr
+; Because we returned w0 but x0 was marked live-in to the block, we didn't
+; remove the <kill> on the str leading to a verification failure.
+define i32 @test_superreg(i64 %in, i64* %dest) {
+  %tst = icmp eq i64 %in, 0
+  br i1 %tst, label %true, label %false
+
+false:
+  ret i32 42
+
+true:
+  store volatile i64 %in, i64* %dest
+  ret i32 0
+}
\ No newline at end of file