[ARM] Cortex-M4 schedule additions

This is an attempt to fill in some of the missing instructions from the
Cortex-M4 schedule, and make it easier to do the same for other ARM cpus.

- Some instructions are marked as hasNoSchedulingInfo as they are pseudos or
  otherwise do not require scheduling info
- A lot of features have been marked not supported
- Some WriteRes's have been added for cvt instructions.
- Some extra instruction latencies have been added, notably by relaxing the
  regex for dsp instruction to catch more cases, and some fp instructions.

This goes a long way to get the CompleteModel working for this CPU. It does not
go far enough as to get all scheduling info for all output operands correct.

Differential Revision: https://reviews.llvm.org/D67957

llvm-svn: 373163
diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
index 66170aa..37e39a0 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
@@ -19,8 +19,8 @@
 ; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-LE-NEXT:    ldr lr, [r3, #2]!
 ; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
-; CHECK-LE-NEXT:    subs r0, #1
 ; CHECK-LE-NEXT:    sxtah r1, r1, lr
+; CHECK-LE-NEXT:    subs r0, #1
 ; CHECK-LE-NEXT:    smlad r12, r4, lr, r12
 ; CHECK-LE-NEXT:    bne .LBB0_2
 ; CHECK-LE-NEXT:  @ %bb.3: @ %for.cond.cleanup
@@ -47,13 +47,13 @@
 ; CHECK-BE-NEXT:  .LBB0_2: @ %for.body
 ; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
-; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
+; CHECK-BE-NEXT:    ldrsh r5, [r2, #2]!
+; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
 ; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
-; CHECK-BE-NEXT:    smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT:    subs r0, #1
-; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT:    smlabb r5, r5, lr, r12
 ; CHECK-BE-NEXT:    add r1, lr
+; CHECK-BE-NEXT:    subs r0, #1
+; CHECK-BE-NEXT:    smlabb r12, r6, r4, r5
 ; CHECK-BE-NEXT:    bne .LBB0_2
 ; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
@@ -154,8 +154,8 @@
 ; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
 ; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
 ; CHECK-BE-NEXT:    smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    mul r1, lr, r1
 ; CHECK-BE-NEXT:    bne .LBB1_2
 ; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
@@ -215,17 +215,17 @@
 ; CHECK-LE-NEXT:    cmp r0, #1
 ; CHECK-LE-NEXT:    blt .LBB2_4
 ; CHECK-LE-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT:    subs r2, #2
+; CHECK-LE-NEXT:    sub.w lr, r2, #2
 ; CHECK-LE-NEXT:    subs r3, #2
 ; CHECK-LE-NEXT:    mov.w r12, #0
 ; CHECK-LE-NEXT:    movs r1, #0
 ; CHECK-LE-NEXT:    .p2align 2
 ; CHECK-LE-NEXT:  .LBB2_2: @ %for.body
 ; CHECK-LE-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT:    ldr r4, [r2, #2]!
-; CHECK-LE-NEXT:    ldr lr, [r3, #2]!
-; CHECK-LE-NEXT:    asrs r5, r4, #16
-; CHECK-LE-NEXT:    smlad r12, r4, lr, r12
+; CHECK-LE-NEXT:    ldr r2, [lr, #2]!
+; CHECK-LE-NEXT:    ldr r4, [r3, #2]!
+; CHECK-LE-NEXT:    asrs r5, r2, #16
+; CHECK-LE-NEXT:    smlad r12, r2, r4, r12
 ; CHECK-LE-NEXT:    subs r0, #1
 ; CHECK-LE-NEXT:    mul r1, r5, r1
 ; CHECK-LE-NEXT:    bne .LBB2_2
@@ -257,8 +257,8 @@
 ; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
 ; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
 ; CHECK-BE-NEXT:    smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    mul r1, r6, r1
 ; CHECK-BE-NEXT:    bne .LBB2_2
 ; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
@@ -343,8 +343,8 @@
 ;
 ; CHECK-BE-LABEL: and_user:
 ; CHECK-BE:       @ %bb.0: @ %entry
-; CHECK-BE-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-BE-NEXT:    push {r4, r5, r6, lr}
+; CHECK-BE-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-BE-NEXT:    push {r4, r5, r6, r7, lr}
 ; CHECK-BE-NEXT:    cmp r0, #1
 ; CHECK-BE-NEXT:    blt .LBB3_4
 ; CHECK-BE-NEXT:  @ %bb.1: @ %for.body.preheader
@@ -356,23 +356,23 @@
 ; CHECK-BE-NEXT:  .LBB3_2: @ %for.body
 ; CHECK-BE-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-BE-NEXT:    ldrsh lr, [r3, #2]!
-; CHECK-BE-NEXT:    ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
-; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
-; CHECK-BE-NEXT:    smlabb r4, r4, lr, r12
-; CHECK-BE-NEXT:    uxth.w lr, lr
-; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
+; CHECK-BE-NEXT:    ldrsh r5, [r2, #2]!
+; CHECK-BE-NEXT:    ldrsh.w r4, [r3, #2]
+; CHECK-BE-NEXT:    ldrsh.w r7, [r2, #2]
+; CHECK-BE-NEXT:    uxth.w r6, lr
+; CHECK-BE-NEXT:    smlabb r5, r5, lr, r12
+; CHECK-BE-NEXT:    smlabb r12, r7, r4, r5
 ; CHECK-BE-NEXT:    subs r0, #1
-; CHECK-BE-NEXT:    mul r1, lr, r1
+; CHECK-BE-NEXT:    mul r1, r6, r1
 ; CHECK-BE-NEXT:    bne .LBB3_2
 ; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT:    pop {r4, r5, r6, r7, pc}
 ; CHECK-BE-NEXT:  .LBB3_4:
 ; CHECK-BE-NEXT:    mov.w r12, #0
 ; CHECK-BE-NEXT:    movs r1, #0
 ; CHECK-BE-NEXT:    add.w r0, r12, r1
-; CHECK-BE-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-BE-NEXT:    pop {r4, r5, r6, r7, pc}
 entry:
   %cmp24 = icmp sgt i32 %arg, 0
   br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup
@@ -466,10 +466,10 @@
 ; CHECK-BE-NEXT:    ldrsh.w r5, [r3, #2]
 ; CHECK-BE-NEXT:    ldrsh.w r6, [r2, #2]
 ; CHECK-BE-NEXT:    smlabb r4, r4, r1, r12
-; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    smlabb r12, r6, r5, r4
 ; CHECK-BE-NEXT:    eor.w r6, r1, lr
-; CHECK-BE-NEXT:    mul r1, r6, r1
+; CHECK-BE-NEXT:    muls r1, r6, r1
+; CHECK-BE-NEXT:    subs r0, #1
 ; CHECK-BE-NEXT:    lsl.w lr, r1, #16
 ; CHECK-BE-NEXT:    bne .LBB4_2
 ; CHECK-BE-NEXT:  @ %bb.3: @ %for.cond.cleanup