Fix a long standing deficiency in the X86 backend: we would sometimes emit "zero" and "all one" vectors multiple times, for example: _test2: pcmpeqd %mm0, %mm0 movq %mm0, _M1 pcmpeqd %mm0, %mm0 movq %mm0, _M2 ret instead of: _test2: pcmpeqd %mm0, %mm0 movq %mm0, _M1 movq %mm0, _M2 ret This patch fixes this by always arranging for zero/one vectors to be defined as v4i32 or v2i32 (SSE/MMX) instead of letting them be any random type. This ensures they get trivially CSE'd on the dag. This fix is also important for LegalizeDAGTypes, as it gets unhappy when the x86 backend wants BUILD_VECTOR(i64 0) to be legal even when 'i64' isn't legal. This patch makes the following changes: 1) X86TargetLowering::LowerBUILD_VECTOR now lowers 0/1 vectors into their canonical types. 2) The now-dead patterns are removed from the SSE/MMX .td files. 3) All the patterns in the .td file that referred to immAllOnesV or immAllZerosV in the wrong form now use *_bc to match them with a bitcast wrapped around them. 4) X86DAGToDAGISel::SelectScalarSSELoad is generalized to handle bitcast'd zero vectors, which simplifies the code actually. 5) getShuffleVectorZeroOrUndef is updated to generate a shuffle that is legal, instead of generating one that is illegal and expecting a later legalize pass to clean it up. 6) isZeroShuffle is generalized to handle bitcast of zeros. 7) several other minor tweaks. This patch is definite goodness, but has the potential to cause random code quality regressions. Please be on the lookout for these and let me know if they happen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44310 91177308-0d34-0410-b5e6-96231b3b80d8

commit: e6aa38653c51a75b08e5e8722ffba0f4bf51225c [log] [tgz]
author: Chris Lattner <sabre@nondot.org> Sun Nov 25 00:24:49 2007 +0000
committer: Chris Lattner <sabre@nondot.org> Sun Nov 25 00:24:49 2007 +0000
tree: 0a057bf1570635f71a197d90d1753c542b24e6df
parent: 1b66f82d6e76174231832f513f1721cbddf972ca [diff] [blame]
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2c86e8d..da23ccb 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td

@@ -939,11 +939,10 @@
                   "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
 
 // Alias instructions that map zero vector to pxor / xorp* for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let isReMaterializable = 1 in
 def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
                  "xorps\t$dst, $dst",
-                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
 
 // FR32 to 128-bit vector conversion.
 def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
@@ -991,7 +990,7 @@
 let AddedComplexity = 20 in
 def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
                       "movss\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
+                      [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV_bc,
                                  (v4f32 (scalar_to_vector (loadf32 addr:$src))),
                                                 MOVL_shuffle_mask)))]>;
 
@@ -2119,11 +2118,10 @@
 
 
 // Alias instructions that map zero vector to pxor / xorp* for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let isReMaterializable = 1 in
   def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
                          "pcmpeqd\t$dst, $dst",
-                         [(set VR128:$dst, (v2f64 immAllOnesV))]>;
+                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
 
 // FR64 to 128-bit vector conversion.
 def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
@@ -2220,7 +2218,7 @@
   def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                         "movsd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                          (v2f64 (vector_shuffle immAllZerosV,
+                          (v2f64 (vector_shuffle immAllZerosV_bc,
                                   (v2f64 (scalar_to_vector
                                           (loadf64 addr:$src))),
                                   MOVL_shuffle_mask)))]>;
@@ -2692,21 +2690,6 @@
 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
 
-// 128-bit vector all zero's.
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
-
-// 128-bit vector all one's.
-def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
-
-
 // Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
 // 16-bits matter.
 def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
@@ -2751,17 +2734,17 @@
 // Move scalar to XMM zero-extended
 // movd to XMM register zero-extends
 let AddedComplexity = 15 in {
-def : Pat<(v8i16 (vector_shuffle immAllZerosV,
+def : Pat<(v8i16 (vector_shuffle immAllZerosV_bc,
                   (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
           (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v16i8 (vector_shuffle immAllZerosV,
+def : Pat<(v16i8 (vector_shuffle immAllZerosV_bc,
                   (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
           (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
 // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
-def : Pat<(v2f64 (vector_shuffle immAllZerosV,
+def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
                   (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
           (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (vector_shuffle immAllZerosV,
+def : Pat<(v4f32 (vector_shuffle immAllZerosV_bc,
                   (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
           (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
 }
@@ -2911,7 +2894,7 @@
 
 // Set lowest element and zero upper elements.
 let AddedComplexity = 20 in
-def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV,
+def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV_bc,
                      (v2f64 (scalar_to_vector (loadf64 addr:$src))),
                      MOVL_shuffle_mask)),
           (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
commit	e6aa38653c51a75b08e5e8722ffba0f4bf51225c	[log] [tgz]
author	Chris Lattner <sabre@nondot.org>	Sun Nov 25 00:24:49 2007 +0000
committer	Chris Lattner <sabre@nondot.org>	Sun Nov 25 00:24:49 2007 +0000
tree	0a057bf1570635f71a197d90d1753c542b24e6df
parent	1b66f82d6e76174231832f513f1721cbddf972ca [diff] [blame]