Add a late SSEDomainFix pass that twiddles SSE instructions to avoid domain crossings.
On Nehalem and newer CPUs there is a 2 cycle latency penalty on using a register
in a different domain than where it was defined. Some instructions have
equvivalents for different domains, like por/orps/orpd.
The SSEDomainFix pass tries to minimize the number of domain crossings by
changing between equvivalent opcodes where possible.
This is a work in progress, in particular the pass doesn't do anything yet. SSE
instructions are tagged with their execution domain in TableGen using the last
two bits of TSFlags. Note that not all instructions are tagged correctly. Life
just isn't that simple.
The SSE execution domain issue is very similar to the ARM NEON/VFP pipeline
issue handled by NEONMoveFixPass. This pass may become target independent to
handle both.
llvm-svn: 99524
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index c06b81b..a811638 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -68,6 +68,16 @@
def CondMovFP : FPFormat<6>;
def SpecialFP : FPFormat<7>;
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Instruction execution domain.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedInt : Domain<1>;
+def SSEPackedSingle : Domain<2>;
+def SSEPackedDouble : Domain<3>;
+
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
@@ -93,7 +103,7 @@
class TF { bits<4> Prefix = 15; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
- string AsmStr>
+ string AsmStr, Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
@@ -119,16 +129,19 @@
bits<3> FPFormBits = 0;
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain Dom = d;
+ bits<2> DomainBits = Dom.Value;
}
-class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
- : X86Inst<o, f, NoImm, outs, ins, asm> {
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm8 , outs, ins, asm> {
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -196,14 +209,16 @@
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
// SSE2 Instruction Templates:
//
@@ -222,10 +237,12 @@
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -235,12 +252,15 @@
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[HasSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[HasSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE3]>;
// SSSE3 Instruction Templates:
@@ -254,10 +274,12 @@
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
//
@@ -266,17 +288,20 @@
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE42]>;
// SS42FI - SSE 4.2 instructions with TF prefix.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -286,7 +311,8 @@
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE42]>;
// X86-64 Instruction templates...
//