[X86] Add IMUL scheduling info on sandybridge, fix it on >=haswell.
Summary:
Only IMUL16rri uses an extra P0156. IMUL32* and IMUL16rr only use
P1.
This was computed using https://github.com/google/EXEgesis/blob/master/exegesis/tools/compute_itineraries.cc
This can easily be validated by running perf on the following code:
```
int main(int argc, char**argv) {
int a = argc;
int b = argc;
int c = argc;
int d = argc;
for (int i = 0; i < LOOP_ITERATIONS; ++i) {
asm volatile(
R"(
.rept 10000
imull $0x2, %%edx, %%eax
imull $0x2, %%ecx, %%ebx
imull $0x2, %%eax, %%edx
imull $0x2, %%ebx, %%ecx
.endr
)"
: "+a"(a), "+b"(b), "+c"(c), "+d"(d)
:
:);
}
return a+b+c+d;
}
```
-> test.cc
perf stat -x, -e cycles --pfm-events=uops_executed_port:port_0:u,uops_executed_port:port_1:u,uops_executed_port:port_2:u,uops_executed_port:port_3:u,uops_executed_port:port_4:u,uops_executed_port:port_5:u,uops_executed_port:port_6:u,uops_executed_port:port_7:u test
Reviewers: craig.topper, RKSimon, gadi.haber
Subscribers: llvm-commits, gchatelet, chandlerc
Differential Revision: https://reviews.llvm.org/D43460
llvm-svn: 326877
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 79deb4b..6441fd5 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -1217,7 +1217,7 @@
def: InstRW<[BWWriteResGroup27], (instregex "CVTDQ2PSrr")>;
def: InstRW<[BWWriteResGroup27], (instregex "CVTPS2DQrr")>;
def: InstRW<[BWWriteResGroup27], (instregex "CVTTPS2DQrr")>;
-def: InstRW<[BWWriteResGroup27], (instrs IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
+def: InstRW<[BWWriteResGroup27], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>;
def: InstRW<[BWWriteResGroup27], (instrs IMUL8r)>;
def: InstRW<[BWWriteResGroup27], (instregex "LZCNT(16|32|64)rr")>;
def: InstRW<[BWWriteResGroup27], (instregex "MAX(C?)PDrr")>;
@@ -1298,7 +1298,7 @@
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>;
+def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rri, IMUL16rri8)>;
def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
let Latency = 3;