[ARM] Cortex-M4 schedule additions
This is an attempt to fill in some of the missing instructions from the
Cortex-M4 schedule, and make it easier to do the same for other ARM cpus.
- Some instructions are marked as hasNoSchedulingInfo as they are pseudos or
otherwise do not require scheduling info
- A lot of features have been marked not supported
- Some WriteRes's have been added for cvt instructions.
- Some extra instruction latencies have been added, notably by relaxing the
regex for dsp instruction to catch more cases, and some fp instructions.
This goes a long way to get the CompleteModel working for this CPU. It does not
go far enough as to get all scheduling info for all output operands correct.
Differential Revision: https://reviews.llvm.org/D67957
llvm-svn: 373163
diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
index 66170aa..37e39a0 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
@@ -19,8 +19,8 @@
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-LE-NEXT: ldr lr, [r3, #2]!
; CHECK-LE-NEXT: ldr r4, [r2, #2]!
-; CHECK-LE-NEXT: subs r0, #1
; CHECK-LE-NEXT: sxtah r1, r1, lr
+; CHECK-LE-NEXT: subs r0, #1
; CHECK-LE-NEXT: smlad r12, r4, lr, r12
; CHECK-LE-NEXT: bne .LBB0_2
; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup
@@ -47,13 +47,13 @@
; CHECK-BE-NEXT: .LBB0_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
-; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: smlabb r5, r5, lr, r12
; CHECK-BE-NEXT: add r1, lr
+; CHECK-BE-NEXT: subs r0, #1
+; CHECK-BE-NEXT: smlabb r12, r6, r4, r5
; CHECK-BE-NEXT: bne .LBB0_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
@@ -154,8 +154,8 @@
; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: mul r1, lr, r1
; CHECK-BE-NEXT: bne .LBB1_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
@@ -215,17 +215,17 @@
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB2_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT: subs r2, #2
+; CHECK-LE-NEXT: sub.w lr, r2, #2
; CHECK-LE-NEXT: subs r3, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: .p2align 2
; CHECK-LE-NEXT: .LBB2_2: @ %for.body
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT: ldr r4, [r2, #2]!
-; CHECK-LE-NEXT: ldr lr, [r3, #2]!
-; CHECK-LE-NEXT: asrs r5, r4, #16
-; CHECK-LE-NEXT: smlad r12, r4, lr, r12
+; CHECK-LE-NEXT: ldr r2, [lr, #2]!
+; CHECK-LE-NEXT: ldr r4, [r3, #2]!
+; CHECK-LE-NEXT: asrs r5, r2, #16
+; CHECK-LE-NEXT: smlad r12, r2, r4, r12
; CHECK-LE-NEXT: subs r0, #1
; CHECK-LE-NEXT: mul r1, r5, r1
; CHECK-LE-NEXT: bne .LBB2_2
@@ -257,8 +257,8 @@
; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: mul r1, r6, r1
; CHECK-BE-NEXT: bne .LBB2_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
@@ -343,8 +343,8 @@
;
; CHECK-BE-LABEL: and_user:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: .save {r4, r5, r6, lr}
-; CHECK-BE-NEXT: push {r4, r5, r6, lr}
+; CHECK-BE-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB3_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
@@ -356,23 +356,23 @@
; CHECK-BE-NEXT: .LBB3_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
-; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
-; CHECK-BE-NEXT: smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT: uxth.w lr, lr
-; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
+; CHECK-BE-NEXT: ldrsh.w r7, [r2, #2]
+; CHECK-BE-NEXT: uxth.w r6, lr
+; CHECK-BE-NEXT: smlabb r5, r5, lr, r12
+; CHECK-BE-NEXT: smlabb r12, r7, r4, r5
; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: mul r1, lr, r1
+; CHECK-BE-NEXT: mul r1, r6, r1
; CHECK-BE-NEXT: bne .LBB3_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-BE-NEXT: .LBB3_4:
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: add.w r0, r12, r1
-; CHECK-BE-NEXT: pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%cmp24 = icmp sgt i32 %arg, 0
br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -466,10 +466,10 @@
; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2]
; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2]
; CHECK-BE-NEXT: smlabb r4, r4, r1, r12
-; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: smlabb r12, r6, r5, r4
; CHECK-BE-NEXT: eor.w r6, r1, lr
-; CHECK-BE-NEXT: mul r1, r6, r1
+; CHECK-BE-NEXT: muls r1, r6, r1
+; CHECK-BE-NEXT: subs r0, #1
; CHECK-BE-NEXT: lsl.w lr, r1, #16
; CHECK-BE-NEXT: bne .LBB4_2
; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup