ARM scheduling fix: compute predicated implicit use properly.
Minor drive by fix to cleanup latency computation. Calling
getOperandLatency with a deliberately incorrect operand index does not
give you the latency you want.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158959 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index de48fee..4a82c36 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s
define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
-;CHECK: vrecpe.f32
;CHECK: vmovn.i32
+;CHECK: vrecpe.f32
;CHECK: vmovn.i32
;CHECK: vmovn.i16
%tmp1 = load <8 x i8>* %A
@@ -15,10 +15,10 @@
define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
;CHECK: vrecps.f32
+;CHECK: vmovn.i32
;CHECK: vrecpe.f32
;CHECK: vrecps.f32
;CHECK: vmovn.i32
-;CHECK: vmovn.i32
;CHECK: vqmovun.s16
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B