[X86][Broadwell] Added the instruction scheduling information for the Broadwell CPU.
Adding the scheduling information for the Browadwell (BDW) CPU target.
This patch adds the instruction scheduling information for the Broadwell (BDW) architecture target by adding the file X86SchedBroadwell.td located under the X86 Target.
We used the scheduling information retrieved from the Broadwell architects in order to create the file.
The scheduling information includes latency, number of micro-Ops and used ports by each BDW instruction.
The patch continues the scheduling replacement and insertion effort started with the SandyBridge (SNB) target in r310792, the Haswell (HSW) target in r311879, the SkylakeClient (SKL) target in rL313613 + rL315978 and the SkylakeServer (SKX) in rL315175.
Performance fluctuations may be expected due to code alignment effects.
Reviewers: zvi, RKSimon, craig.topper
Differential Revision: https://reviews.llvm.org/D39054
Change-Id: If6f799e5ff60e1091c8d43b05ea78c53581bae01
llvm-svn: 316492
diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll
index 844fb7a..20e022a 100644
--- a/llvm/test/CodeGen/X86/sse-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse-schedule.ll
@@ -45,8 +45,8 @@
; BROADWELL-LABEL: test_addps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_addps:
; SKYLAKE: # BB#0:
@@ -111,8 +111,8 @@
; BROADWELL-LABEL: test_addss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_addss:
; SKYLAKE: # BB#0:
@@ -181,8 +181,8 @@
; BROADWELL-LABEL: test_andps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_andps:
; SKYLAKE: # BB#0:
@@ -255,8 +255,8 @@
; BROADWELL-LABEL: test_andnotps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_andnotps:
; SKYLAKE: # BB#0:
@@ -332,9 +332,9 @@
; BROADWELL-LABEL: test_cmpps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cmpps:
; SKYLAKE: # BB#0:
@@ -407,8 +407,8 @@
; BROADWELL-LABEL: test_cmpss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cmpss:
; SKYLAKE: # BB#0:
@@ -521,13 +521,13 @@
; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_comiss:
; SKYLAKE: # BB#0:
@@ -631,9 +631,9 @@
; BROADWELL-LABEL: test_cvtsi2ss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtsi2ss:
; SKYLAKE: # BB#0:
@@ -708,9 +708,9 @@
; BROADWELL-LABEL: test_cvtsi2ssq:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
+; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtsi2ssq:
; SKYLAKE: # BB#0:
@@ -785,9 +785,9 @@
; BROADWELL-LABEL: test_cvtss2si:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
+; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00]
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtss2si:
; SKYLAKE: # BB#0:
@@ -865,9 +865,9 @@
; BROADWELL-LABEL: test_cvtss2siq:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
+; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00]
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvtss2siq:
; SKYLAKE: # BB#0:
@@ -945,9 +945,9 @@
; BROADWELL-LABEL: test_cvttss2si:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
+; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00]
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvttss2si:
; SKYLAKE: # BB#0:
@@ -1022,9 +1022,9 @@
; BROADWELL-LABEL: test_cvttss2siq:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
+; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00]
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_cvttss2siq:
; SKYLAKE: # BB#0:
@@ -1093,9 +1093,9 @@
;
; BROADWELL-LABEL: test_divps:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
+; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_divps:
; SKYLAKE: # BB#0:
@@ -1159,9 +1159,9 @@
;
; BROADWELL-LABEL: test_divss:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
+; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_divss:
; SKYLAKE: # BB#0:
@@ -1226,8 +1226,8 @@
; BROADWELL-LABEL: test_ldmxcsr:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_ldmxcsr:
; SKYLAKE: # BB#0:
@@ -1294,8 +1294,8 @@
; BROADWELL-LABEL: test_maxps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_maxps:
; SKYLAKE: # BB#0:
@@ -1361,8 +1361,8 @@
; BROADWELL-LABEL: test_maxss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_maxss:
; SKYLAKE: # BB#0:
@@ -1428,8 +1428,8 @@
; BROADWELL-LABEL: test_minps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_minps:
; SKYLAKE: # BB#0:
@@ -1495,8 +1495,8 @@
; BROADWELL-LABEL: test_minss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_minss:
; SKYLAKE: # BB#0:
@@ -1566,10 +1566,10 @@
;
; BROADWELL-LABEL: test_movaps:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
+; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movaps:
; SKYLAKE: # BB#0:
@@ -1641,7 +1641,7 @@
; BROADWELL-LABEL: test_movhlps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movhlps:
; SKYLAKE: # BB#0:
@@ -1708,10 +1708,10 @@
;
; BROADWELL-LABEL: test_movhps:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
+; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movhps:
; SKYLAKE: # BB#0:
@@ -1787,7 +1787,7 @@
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movlhps:
; SKYLAKE: # BB#0:
@@ -1855,10 +1855,10 @@
;
; BROADWELL-LABEL: test_movlps:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
+; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movlps:
; SKYLAKE: # BB#0:
@@ -1928,7 +1928,7 @@
; BROADWELL-LABEL: test_movmskps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movmskps:
; SKYLAKE: # BB#0:
@@ -1989,7 +1989,7 @@
; BROADWELL-LABEL: test_movntps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movntps:
; SKYLAKE: # BB#0:
@@ -2052,10 +2052,10 @@
;
; BROADWELL-LABEL: test_movss_mem:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
+; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movss_mem:
; SKYLAKE: # BB#0:
@@ -2125,7 +2125,7 @@
; BROADWELL-LABEL: test_movss_reg:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movss_reg:
; SKYLAKE: # BB#0:
@@ -2188,10 +2188,10 @@
;
; BROADWELL-LABEL: test_movups:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
+; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movups:
; SKYLAKE: # BB#0:
@@ -2259,9 +2259,9 @@
;
; BROADWELL-LABEL: test_mulps:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
+; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_mulps:
; SKYLAKE: # BB#0:
@@ -2325,9 +2325,9 @@
;
; BROADWELL-LABEL: test_mulss:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
+; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_mulss:
; SKYLAKE: # BB#0:
@@ -2396,8 +2396,8 @@
; BROADWELL-LABEL: test_orps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_orps:
; SKYLAKE: # BB#0:
@@ -2466,8 +2466,8 @@
;
; BROADWELL-LABEL: test_prefetchnta:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_prefetchnta:
; SKYLAKE: # BB#0:
@@ -2534,9 +2534,9 @@
; BROADWELL-LABEL: test_rcpps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
+; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_rcpps:
; SKYLAKE: # BB#0:
@@ -2619,10 +2619,10 @@
; BROADWELL-LABEL: test_rcpss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_rcpss:
; SKYLAKE: # BB#0:
@@ -2706,9 +2706,9 @@
; BROADWELL-LABEL: test_rsqrtps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
+; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_rsqrtps:
; SKYLAKE: # BB#0:
@@ -2791,10 +2791,10 @@
; BROADWELL-LABEL: test_rsqrtss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
+; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_rsqrtss:
; SKYLAKE: # BB#0:
@@ -2871,8 +2871,8 @@
;
; BROADWELL-LABEL: test_sfence:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: sfence # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: sfence # sched: [2:0.33]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_sfence:
; SKYLAKE: # BB#0:
@@ -2936,8 +2936,8 @@
; BROADWELL-LABEL: test_shufps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_shufps:
; SKYLAKE: # BB#0:
@@ -3008,9 +3008,9 @@
; BROADWELL-LABEL: test_sqrtps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
-; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
+; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_sqrtps:
; SKYLAKE: # BB#0:
@@ -3093,10 +3093,10 @@
; BROADWELL-LABEL: test_sqrtss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
-; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
+; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50]
; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_sqrtss:
; SKYLAKE: # BB#0:
@@ -3170,9 +3170,9 @@
;
; BROADWELL-LABEL: test_stmxcsr:
; BROADWELL: # BB#0:
-; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
+; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_stmxcsr:
; SKYLAKE: # BB#0:
@@ -3239,8 +3239,8 @@
; BROADWELL-LABEL: test_subps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_subps:
; SKYLAKE: # BB#0:
@@ -3305,8 +3305,8 @@
; BROADWELL-LABEL: test_subss:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_subss:
; SKYLAKE: # BB#0:
@@ -3414,13 +3414,13 @@
; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_ucomiss:
; SKYLAKE: # BB#0:
@@ -3523,8 +3523,8 @@
; BROADWELL-LABEL: test_unpckhps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_unpckhps:
; SKYLAKE: # BB#0:
@@ -3593,8 +3593,8 @@
; BROADWELL-LABEL: test_unpcklps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_unpcklps:
; SKYLAKE: # BB#0:
@@ -3663,8 +3663,8 @@
; BROADWELL-LABEL: test_xorps:
; BROADWELL: # BB#0:
; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [2:1.00]
+; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
+; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_xorps:
; SKYLAKE: # BB#0: