[X86][Broadwell] Added the instruction scheduling information for the Broadwell CPU.

Adding the scheduling information for the Browadwell (BDW) CPU target.

This patch adds the instruction scheduling information for the Broadwell (BDW) architecture target by adding the file X86SchedBroadwell.td located under the X86 Target.
We used the scheduling information retrieved from the Broadwell architects in order to create the file.
The scheduling information includes latency, number of micro-Ops and used ports by each BDW instruction.

The patch continues the scheduling replacement and insertion effort started with the SandyBridge (SNB) target in r310792, the Haswell (HSW) target in r311879, the SkylakeClient (SKL) target in rL313613 + rL315978 and the SkylakeServer (SKX) in rL315175.

Performance fluctuations may be expected due to code alignment effects.

Reviewers: zvi, RKSimon, craig.topper
Differential Revision: https://reviews.llvm.org/D39054

Change-Id: If6f799e5ff60e1091c8d43b05ea78c53581bae01
llvm-svn: 316492
diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll
index 844fb7a..20e022a 100644
--- a/llvm/test/CodeGen/X86/sse-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse-schedule.ll
@@ -45,8 +45,8 @@
 ; BROADWELL-LABEL: test_addps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_addps:
 ; SKYLAKE:       # BB#0:
@@ -111,8 +111,8 @@
 ; BROADWELL-LABEL: test_addss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_addss:
 ; SKYLAKE:       # BB#0:
@@ -181,8 +181,8 @@
 ; BROADWELL-LABEL: test_andps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_andps:
 ; SKYLAKE:       # BB#0:
@@ -255,8 +255,8 @@
 ; BROADWELL-LABEL: test_andnotps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_andnotps:
 ; SKYLAKE:       # BB#0:
@@ -332,9 +332,9 @@
 ; BROADWELL-LABEL: test_cmpps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cmpps:
 ; SKYLAKE:       # BB#0:
@@ -407,8 +407,8 @@
 ; BROADWELL-LABEL: test_cmpss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cmpss:
 ; SKYLAKE:       # BB#0:
@@ -521,13 +521,13 @@
 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
 ; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
 ; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT:    vcomiss (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-NEXT:    vcomiss (%rdi), %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
 ; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
 ; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
 ; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_comiss:
 ; SKYLAKE:       # BB#0:
@@ -631,9 +631,9 @@
 ; BROADWELL-LABEL: test_cvtsi2ss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BROADWELL-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvtsi2ss:
 ; SKYLAKE:       # BB#0:
@@ -708,9 +708,9 @@
 ; BROADWELL-LABEL: test_cvtsi2ssq:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BROADWELL-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvtsi2ssq:
 ; SKYLAKE:       # BB#0:
@@ -785,9 +785,9 @@
 ; BROADWELL-LABEL: test_cvtss2si:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcvtss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtss2si (%rdi), %eax # sched: [4:1.00]
+; BROADWELL-NEXT:    vcvtss2si (%rdi), %eax # sched: [9:1.00]
 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvtss2si:
 ; SKYLAKE:       # BB#0:
@@ -865,9 +865,9 @@
 ; BROADWELL-LABEL: test_cvtss2siq:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcvtss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvtss2si (%rdi), %rax # sched: [4:1.00]
+; BROADWELL-NEXT:    vcvtss2si (%rdi), %rax # sched: [9:1.00]
 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvtss2siq:
 ; SKYLAKE:       # BB#0:
@@ -945,9 +945,9 @@
 ; BROADWELL-LABEL: test_cvttss2si:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcvttss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvttss2si (%rdi), %eax # sched: [4:1.00]
+; BROADWELL-NEXT:    vcvttss2si (%rdi), %eax # sched: [9:1.00]
 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvttss2si:
 ; SKYLAKE:       # BB#0:
@@ -1022,9 +1022,9 @@
 ; BROADWELL-LABEL: test_cvttss2siq:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vcvttss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT:    vcvttss2si (%rdi), %rax # sched: [4:1.00]
+; BROADWELL-NEXT:    vcvttss2si (%rdi), %rax # sched: [9:1.00]
 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_cvttss2siq:
 ; SKYLAKE:       # BB#0:
@@ -1093,9 +1093,9 @@
 ;
 ; BROADWELL-LABEL: test_divps:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
+; BROADWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_divps:
 ; SKYLAKE:       # BB#0:
@@ -1159,9 +1159,9 @@
 ;
 ; BROADWELL-LABEL: test_divss:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
+; BROADWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_divss:
 ; SKYLAKE:       # BB#0:
@@ -1226,8 +1226,8 @@
 ; BROADWELL-LABEL: test_ldmxcsr:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BROADWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_ldmxcsr:
 ; SKYLAKE:       # BB#0:
@@ -1294,8 +1294,8 @@
 ; BROADWELL-LABEL: test_maxps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_maxps:
 ; SKYLAKE:       # BB#0:
@@ -1361,8 +1361,8 @@
 ; BROADWELL-LABEL: test_maxss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_maxss:
 ; SKYLAKE:       # BB#0:
@@ -1428,8 +1428,8 @@
 ; BROADWELL-LABEL: test_minps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_minps:
 ; SKYLAKE:       # BB#0:
@@ -1495,8 +1495,8 @@
 ; BROADWELL-LABEL: test_minss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_minss:
 ; SKYLAKE:       # BB#0:
@@ -1566,10 +1566,10 @@
 ;
 ; BROADWELL-LABEL: test_movaps:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [1:0.50]
+; BROADWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movaps:
 ; SKYLAKE:       # BB#0:
@@ -1641,7 +1641,7 @@
 ; BROADWELL-LABEL: test_movhlps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movhlps:
 ; SKYLAKE:       # BB#0:
@@ -1708,10 +1708,10 @@
 ;
 ; BROADWELL-LABEL: test_movhps:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
+; BROADWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movhps:
 ; SKYLAKE:       # BB#0:
@@ -1787,7 +1787,7 @@
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movlhps:
 ; SKYLAKE:       # BB#0:
@@ -1855,10 +1855,10 @@
 ;
 ; BROADWELL-LABEL: test_movlps:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
+; BROADWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movlps:
 ; SKYLAKE:       # BB#0:
@@ -1928,7 +1928,7 @@
 ; BROADWELL-LABEL: test_movmskps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movmskps:
 ; SKYLAKE:       # BB#0:
@@ -1989,7 +1989,7 @@
 ; BROADWELL-LABEL: test_movntps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movntps:
 ; SKYLAKE:       # BB#0:
@@ -2052,10 +2052,10 @@
 ;
 ; BROADWELL-LABEL: test_movss_mem:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
+; BROADWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
 ; BROADWELL-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movss_mem:
 ; SKYLAKE:       # BB#0:
@@ -2125,7 +2125,7 @@
 ; BROADWELL-LABEL: test_movss_reg:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movss_reg:
 ; SKYLAKE:       # BB#0:
@@ -2188,10 +2188,10 @@
 ;
 ; BROADWELL-LABEL: test_movups:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [1:0.50]
+; BROADWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
 ; BROADWELL-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_movups:
 ; SKYLAKE:       # BB#0:
@@ -2259,9 +2259,9 @@
 ;
 ; BROADWELL-LABEL: test_mulps:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
+; BROADWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_mulps:
 ; SKYLAKE:       # BB#0:
@@ -2325,9 +2325,9 @@
 ;
 ; BROADWELL-LABEL: test_mulss:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
+; BROADWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_mulss:
 ; SKYLAKE:       # BB#0:
@@ -2396,8 +2396,8 @@
 ; BROADWELL-LABEL: test_orps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_orps:
 ; SKYLAKE:       # BB#0:
@@ -2466,8 +2466,8 @@
 ;
 ; BROADWELL-LABEL: test_prefetchnta:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    prefetchnta (%rdi) # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_prefetchnta:
 ; SKYLAKE:       # BB#0:
@@ -2534,9 +2534,9 @@
 ; BROADWELL-LABEL: test_rcpps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vrcpps (%rdi), %xmm1 # sched: [5:1.00]
+; BROADWELL-NEXT:    vrcpps (%rdi), %xmm1 # sched: [10:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_rcpps:
 ; SKYLAKE:       # BB#0:
@@ -2619,10 +2619,10 @@
 ; BROADWELL-LABEL: test_rcpss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
 ; BROADWELL-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_rcpss:
 ; SKYLAKE:       # BB#0:
@@ -2706,9 +2706,9 @@
 ; BROADWELL-LABEL: test_rsqrtps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
+; BROADWELL-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_rsqrtps:
 ; SKYLAKE:       # BB#0:
@@ -2791,10 +2791,10 @@
 ; BROADWELL-LABEL: test_rsqrtss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
 ; BROADWELL-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_rsqrtss:
 ; SKYLAKE:       # BB#0:
@@ -2871,8 +2871,8 @@
 ;
 ; BROADWELL-LABEL: test_sfence:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    sfence # sched: [1:0.33]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    sfence # sched: [2:0.33]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_sfence:
 ; SKYLAKE:       # BB#0:
@@ -2936,8 +2936,8 @@
 ; BROADWELL-LABEL: test_shufps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; BROADWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_shufps:
 ; SKYLAKE:       # BB#0:
@@ -3008,9 +3008,9 @@
 ; BROADWELL-LABEL: test_sqrtps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
-; BROADWELL-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
+; BROADWELL-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_sqrtps:
 ; SKYLAKE:       # BB#0:
@@ -3093,10 +3093,10 @@
 ; BROADWELL-LABEL: test_sqrtss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
-; BROADWELL-NEXT:    vmovaps (%rdi), %xmm1 # sched: [1:0.50]
+; BROADWELL-NEXT:    vmovaps (%rdi), %xmm1 # sched: [5:0.50]
 ; BROADWELL-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_sqrtss:
 ; SKYLAKE:       # BB#0:
@@ -3170,9 +3170,9 @@
 ;
 ; BROADWELL-LABEL: test_stmxcsr:
 ; BROADWELL:       # BB#0:
-; BROADWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BROADWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; BROADWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_stmxcsr:
 ; SKYLAKE:       # BB#0:
@@ -3239,8 +3239,8 @@
 ; BROADWELL-LABEL: test_subps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_subps:
 ; SKYLAKE:       # BB#0:
@@ -3305,8 +3305,8 @@
 ; BROADWELL-LABEL: test_subss:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_subss:
 ; SKYLAKE:       # BB#0:
@@ -3414,13 +3414,13 @@
 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
 ; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
 ; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT:    vucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-NEXT:    vucomiss (%rdi), %xmm0 # sched: [8:1.00]
 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
 ; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
 ; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
 ; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_ucomiss:
 ; SKYLAKE:       # BB#0:
@@ -3523,8 +3523,8 @@
 ; BROADWELL-LABEL: test_unpckhps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_unpckhps:
 ; SKYLAKE:       # BB#0:
@@ -3593,8 +3593,8 @@
 ; BROADWELL-LABEL: test_unpcklps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_unpcklps:
 ; SKYLAKE:       # BB#0:
@@ -3663,8 +3663,8 @@
 ; BROADWELL-LABEL: test_xorps:
 ; BROADWELL:       # BB#0:
 ; BROADWELL-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT:    retq # sched: [2:1.00]
+; BROADWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_xorps:
 ; SKYLAKE:       # BB#0: