Added separate alias instructions for SSE logical ops that operate on non-packed types.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26297 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 345f3e0..58f003f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -356,8 +356,11 @@
def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extload node:$ptr, i1))>;
def extloadf64f32 : PatFrag<(ops node:$ptr), (f64 (extload node:$ptr, f32))>;
-def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
-def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
+def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
+def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
+
+def X86loadpv4f32 : PatFrag<(ops node:$ptr), (v4f32 (X86loadp node:$ptr))>;
+def X86loadpv2f64 : PatFrag<(ops node:$ptr), (v2f64 (X86loadp node:$ptr))>;
//===----------------------------------------------------------------------===//
// Instruction templates...
@@ -705,18 +708,6 @@
"mov{l} {$src, $dst|$dst, $src}",
[(store R32:$src, addr:$dst)]>;
-// Pseudo-instructions that map movr0 to xor.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-def MOV8r0 : I<0x30, MRMInitReg, (ops R8 :$dst),
- "xor{b} $dst, $dst",
- [(set R8:$dst, 0)]>;
-def MOV16r0 : I<0x31, MRMInitReg, (ops R16:$dst),
- "xor{w} $dst, $dst",
- [(set R16:$dst, 0)]>, OpSize;
-def MOV32r0 : I<0x31, MRMInitReg, (ops R32:$dst),
- "xor{l} $dst, $dst",
- [(set R32:$dst, 0)]>;
-
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions...
//
@@ -2485,15 +2476,6 @@
[(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
Requires<[HasSSE2]>, TB, OpSize;
-// Pseudo-instructions that map fld0 to pxor for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
- "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
-def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
- "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
-
let isTwoAddress = 1 in {
// SSE Scalar Arithmetic
let isCommutable = 1 in {
@@ -2583,71 +2565,6 @@
(ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE2]>, XD;
-
-// SSE Logical - these all operate on packed values
-let isCommutable = 1 in {
-def ANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
- "andps {$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>,
- Requires<[HasSSE1]>, TB;
-def ANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
- "andpd {$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
- "orps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
- "orpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
- "xorps {$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
- Requires<[HasSSE1]>, TB;
-def XORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
- "xorpd {$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-}
-def ANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
- "andps {$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fand FR32:$src1,
- (X86loadpf32 addr:$src2)))]>,
- Requires<[HasSSE1]>, TB;
-def ANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
- "andpd {$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fand FR64:$src1,
- (X86loadpf64 addr:$src2)))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
- "orps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
- "orpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
- "xorps {$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fxor FR32:$src1,
- (X86loadpf32 addr:$src2)))]>,
- Requires<[HasSSE1]>, TB;
-def XORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
- "xorpd {$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fxor FR64:$src1,
- (X86loadpf64 addr:$src2)))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-
-def ANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
- "andnps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
- "andnps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
- "andnpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
-def ANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
- "andnpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
}
//===----------------------------------------------------------------------===//
@@ -3076,7 +2993,7 @@
"movaps {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, TB;
def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
- "movapd {$src, $dst|$dst, $src}", []>,
+ "movapd {$src, $dst|$dst, $src}v2", []>,
Requires<[HasSSE2]>, TB, OpSize;
def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
@@ -3092,6 +3009,106 @@
"movapd {$src, $dst|$dst, $src}",[]>,
Requires<[HasSSE2]>, TB, OpSize;
+// Logical
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
+ Requires<[HasSSE1]>, TB;
+def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "orps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "orpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
+ Requires<[HasSSE1]>, TB;
+def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+}
+def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fand V4F32:$src1,
+ (X86loadpv4f32 addr:$src2)))]>,
+ Requires<[HasSSE1]>, TB;
+def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fand V2F64:$src1,
+ (X86loadpv2f64 addr:$src2)))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "orps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "orpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fxor V4F32:$src1,
+ (X86loadpv4f32 addr:$src2)))]>,
+ Requires<[HasSSE1]>, TB;
+def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fxor V2F64:$src1,
+ (X86loadpv2f64 addr:$src2)))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
+def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def RDTSC : I<0x31, RawFrm, (ops), "rdtsc", [(X86rdtsc)]>,
+ TB, Imp<[],[EAX,EDX]>;
+
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+def MOV8r0 : I<0x30, MRMInitReg, (ops R8 :$dst),
+ "xor{b} $dst, $dst",
+ [(set R8:$dst, 0)]>;
+def MOV16r0 : I<0x31, MRMInitReg, (ops R16:$dst),
+ "xor{w} $dst, $dst",
+ [(set R16:$dst, 0)]>, OpSize;
+def MOV32r0 : I<0x31, MRMInitReg, (ops R32:$dst),
+ "xor{l} $dst, $dst",
+ [(set R32:$dst, 0)]>;
+
+// Alias instructions that map fld0 to pxor for sse.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+def FLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
+ "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+def FLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst),
+ "pxor $dst, $dst", [(set FR64:$dst, fp64imm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
// Upper bits are disregarded.
def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
@@ -3112,14 +3129,72 @@
[(set FR64:$dst, (X86loadpf64 addr:$src))]>,
Requires<[HasSSE2]>, TB, OpSize;
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>,
+ Requires<[HasSSE1]>, TB;
+def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "orps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "orpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
+ Requires<[HasSSE1]>, TB;
+def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+}
+def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1,
+ (X86loadpf32 addr:$src2)))]>,
+ Requires<[HasSSE1]>, TB;
+def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1,
+ (X86loadpf64 addr:$src2)))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "orps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "orpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1,
+ (X86loadpf32 addr:$src2)))]>,
+ Requires<[HasSSE1]>, TB;
+def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1,
+ (X86loadpf64 addr:$src2)))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions
-//===----------------------------------------------------------------------===//
-
-def RDTSC : I<0x31, RawFrm, (ops), "rdtsc", [(X86rdtsc)]>,
- TB, Imp<[],[EAX,EDX]>;
-
+def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+}
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns