blob: c7731c1dfbe9c324cdc65270a5920a964e960fc6 [file] [log] [blame]
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001//====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the University
6// of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 SSE instruction set, defining the instructions,
11// and properties of the instructions which are needed for code generation,
12// machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16
17//===----------------------------------------------------------------------===//
18// SSE specific DAG Nodes.
19//===----------------------------------------------------------------------===//
20
21def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
22 SDTCisFP<0>, SDTCisInt<2> ]>;
23
Dan Gohmanf17a25c2007-07-18 16:29:46 +000024def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
25def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
26def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
27 [SDNPCommutative, SDNPAssociative]>;
28def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
29 [SDNPCommutative, SDNPAssociative]>;
30def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
31 [SDNPCommutative, SDNPAssociative]>;
32def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
33def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
34def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
35def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
Evan Cheng950aac02007-09-25 01:57:46 +000036 [SDNPHasChain]>;
Evan Cheng621216e2007-09-29 00:00:36 +000037def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +000038def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
39def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
40def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
41
42//===----------------------------------------------------------------------===//
43// SSE 'Special' Instructions
44//===----------------------------------------------------------------------===//
45
Evan Chengb783fa32007-07-19 01:14:50 +000046def IMPLICIT_DEF_VR128 : I<0, Pseudo, (outs VR128:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000047 "#IMPLICIT_DEF $dst",
48 [(set VR128:$dst, (v4f32 (undef)))]>,
49 Requires<[HasSSE1]>;
Evan Chengb783fa32007-07-19 01:14:50 +000050def IMPLICIT_DEF_FR32 : I<0, Pseudo, (outs FR32:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000051 "#IMPLICIT_DEF $dst",
Dale Johannesene0e0fd02007-09-23 14:52:20 +000052 [(set FR32:$dst, (undef))]>, Requires<[HasSSE1]>;
Evan Chengb783fa32007-07-19 01:14:50 +000053def IMPLICIT_DEF_FR64 : I<0, Pseudo, (outs FR64:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000054 "#IMPLICIT_DEF $dst",
55 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
56
57//===----------------------------------------------------------------------===//
58// SSE Complex Patterns
59//===----------------------------------------------------------------------===//
60
61// These are 'extloads' from a scalar to the low element of a vector, zeroing
62// the top elements. These are used for the SSE 'ss' and 'sd' instruction
63// forms.
64def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", [],
65 [SDNPHasChain]>;
66def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", [],
67 [SDNPHasChain]>;
68
69def ssmem : Operand<v4f32> {
70 let PrintMethod = "printf32mem";
71 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
72}
73def sdmem : Operand<v2f64> {
74 let PrintMethod = "printf64mem";
75 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
76}
77
78//===----------------------------------------------------------------------===//
79// SSE pattern fragments
80//===----------------------------------------------------------------------===//
81
Dan Gohmanf17a25c2007-07-18 16:29:46 +000082def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
83def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
84def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
85def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
86
Dan Gohman11821702007-07-27 17:16:43 +000087// Like 'store', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000088def alignedstore : PatFrag<(ops node:$val, node:$ptr),
89 (st node:$val, node:$ptr), [{
90 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
91 return !ST->isTruncatingStore() &&
92 ST->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +000093 ST->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +000094 return false;
95}]>;
96
Dan Gohman11821702007-07-27 17:16:43 +000097// Like 'load', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000098def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
99 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
100 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
101 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000102 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000103 return false;
104}]>;
105
Dan Gohman11821702007-07-27 17:16:43 +0000106def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
107def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000108def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
109def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
110def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
111def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>;
112
113// Like 'load', but uses special alignment checks suitable for use in
114// memory operands in most SSE instructions, which are required to
115// be naturally aligned on some targets but not on others.
116// FIXME: Actually implement support for targets that don't require the
117// alignment. This probably wants a subtarget predicate.
118def memop : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
119 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
120 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
121 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000122 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000123 return false;
124}]>;
125
Dan Gohman11821702007-07-27 17:16:43 +0000126def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
127def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000128def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
129def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
130def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
131def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
132
Bill Wendling3b15d722007-08-11 09:52:53 +0000133// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
134// 16-byte boundary.
135def memop64 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
136 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
137 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
138 LD->getAddressingMode() == ISD::UNINDEXED &&
139 LD->getAlignment() >= 8;
140 return false;
141}]>;
142
143def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
144def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop64 node:$ptr))>;
145def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
146def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
147def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
148
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000149def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
150def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
151def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
152def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
153def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
154def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
155
156def fp32imm0 : PatLeaf<(f32 fpimm), [{
157 return N->isExactlyValue(+0.0);
158}]>;
159
160def PSxLDQ_imm : SDNodeXForm<imm, [{
161 // Transformation function: imm >> 3
162 return getI32Imm(N->getValue() >> 3);
163}]>;
164
165// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
166// SHUFP* etc. imm.
167def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
168 return getI8Imm(X86::getShuffleSHUFImmediate(N));
169}]>;
170
171// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
172// PSHUFHW imm.
173def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
174 return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
175}]>;
176
177// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
178// PSHUFLW imm.
179def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
180 return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
181}]>;
182
183def SSE_splat_mask : PatLeaf<(build_vector), [{
184 return X86::isSplatMask(N);
185}], SHUFFLE_get_shuf_imm>;
186
187def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
188 return X86::isSplatLoMask(N);
189}]>;
190
191def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
192 return X86::isMOVHLPSMask(N);
193}]>;
194
195def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
196 return X86::isMOVHLPS_v_undef_Mask(N);
197}]>;
198
199def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
200 return X86::isMOVHPMask(N);
201}]>;
202
203def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
204 return X86::isMOVLPMask(N);
205}]>;
206
207def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
208 return X86::isMOVLMask(N);
209}]>;
210
211def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
212 return X86::isMOVSHDUPMask(N);
213}]>;
214
215def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
216 return X86::isMOVSLDUPMask(N);
217}]>;
218
219def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
220 return X86::isUNPCKLMask(N);
221}]>;
222
223def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
224 return X86::isUNPCKHMask(N);
225}]>;
226
227def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
228 return X86::isUNPCKL_v_undef_Mask(N);
229}]>;
230
231def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
232 return X86::isUNPCKH_v_undef_Mask(N);
233}]>;
234
235def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
236 return X86::isPSHUFDMask(N);
237}], SHUFFLE_get_shuf_imm>;
238
239def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
240 return X86::isPSHUFHWMask(N);
241}], SHUFFLE_get_pshufhw_imm>;
242
243def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
244 return X86::isPSHUFLWMask(N);
245}], SHUFFLE_get_pshuflw_imm>;
246
247def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
248 return X86::isPSHUFDMask(N);
249}], SHUFFLE_get_shuf_imm>;
250
251def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
252 return X86::isSHUFPMask(N);
253}], SHUFFLE_get_shuf_imm>;
254
255def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
256 return X86::isSHUFPMask(N);
257}], SHUFFLE_get_shuf_imm>;
258
259//===----------------------------------------------------------------------===//
260// SSE scalar FP Instructions
261//===----------------------------------------------------------------------===//
262
263// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
264// scheduler into a branch sequence.
Evan Cheng950aac02007-09-25 01:57:46 +0000265// These are expanded by the scheduler.
266let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000267 def CMOV_FR32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000268 (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000269 "#CMOV_FR32 PSEUDO!",
Evan Cheng621216e2007-09-29 00:00:36 +0000270 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
271 EFLAGS))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000272 def CMOV_FR64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000273 (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000274 "#CMOV_FR64 PSEUDO!",
Evan Cheng621216e2007-09-29 00:00:36 +0000275 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
276 EFLAGS))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000277 def CMOV_V4F32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000278 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000279 "#CMOV_V4F32 PSEUDO!",
280 [(set VR128:$dst,
Evan Cheng621216e2007-09-29 00:00:36 +0000281 (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
282 EFLAGS)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000283 def CMOV_V2F64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000284 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000285 "#CMOV_V2F64 PSEUDO!",
286 [(set VR128:$dst,
Evan Cheng621216e2007-09-29 00:00:36 +0000287 (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
288 EFLAGS)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000289 def CMOV_V2I64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000290 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000291 "#CMOV_V2I64 PSEUDO!",
292 [(set VR128:$dst,
Evan Cheng621216e2007-09-29 00:00:36 +0000293 (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
Evan Cheng950aac02007-09-25 01:57:46 +0000294 EFLAGS)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000295}
296
297//===----------------------------------------------------------------------===//
298// SSE1 Instructions
299//===----------------------------------------------------------------------===//
300
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000301// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000302def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000303 "movss\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000304let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000305def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000306 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000307 [(set FR32:$dst, (loadf32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000308def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000309 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000310 [(store FR32:$src, addr:$dst)]>;
311
312// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000313def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000314 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000315 [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000316def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000317 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000318 [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000319def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000320 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000321 [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000322def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000323 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000324 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
325
326// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +0000327def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000328 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000329 [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000330def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000331 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000332 [(set GR32:$dst, (int_x86_sse_cvtss2si
333 (load addr:$src)))]>;
334
335// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +0000336def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000337 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000338 [(set GR32:$dst,
339 (int_x86_sse_cvttss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000340def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000341 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000342 [(set GR32:$dst,
343 (int_x86_sse_cvttss2si(load addr:$src)))]>;
344
345let isTwoAddress = 1 in {
346 def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000347 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000348 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000349 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
350 GR32:$src2))]>;
351 def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000352 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000353 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000354 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
355 (loadi32 addr:$src2)))]>;
356}
357
358// Comparison instructions
359let isTwoAddress = 1 in {
360 def CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000361 (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000362 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000363 def CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000364 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000365 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000366}
367
Evan Cheng55687072007-09-14 21:48:26 +0000368let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +0000369def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000370 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000371 [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000372def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000373 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000374 [(X86cmp FR32:$src1, (loadf32 addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +0000375 (implicit EFLAGS)]>;
Evan Cheng55687072007-09-14 21:48:26 +0000376} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000377
378// Aliases to match intrinsics which expect XMM operand(s).
379let isTwoAddress = 1 in {
380 def Int_CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000381 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000382 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000383 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
384 VR128:$src, imm:$cc))]>;
385 def Int_CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000386 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000387 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000388 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
389 (load addr:$src), imm:$cc))]>;
390}
391
Evan Cheng55687072007-09-14 21:48:26 +0000392let Defs = [EFLAGS] in {
Evan Cheng621216e2007-09-29 00:00:36 +0000393def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000394 (ins VR128:$src1, VR128:$src2),
395 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000396 [(X86ucomi (v4f32 VR128:$src1), VR128:$src2),
Evan Cheng950aac02007-09-25 01:57:46 +0000397 (implicit EFLAGS)]>;
Evan Cheng621216e2007-09-29 00:00:36 +0000398def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000399 (ins VR128:$src1, f128mem:$src2),
400 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000401 [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +0000402 (implicit EFLAGS)]>;
403
Evan Cheng621216e2007-09-29 00:00:36 +0000404def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000405 (ins VR128:$src1, VR128:$src2),
406 "comiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000407 [(X86comi (v4f32 VR128:$src1), VR128:$src2),
Evan Cheng950aac02007-09-25 01:57:46 +0000408 (implicit EFLAGS)]>;
Evan Cheng621216e2007-09-29 00:00:36 +0000409def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000410 (ins VR128:$src1, f128mem:$src2),
411 "comiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000412 [(X86comi (v4f32 VR128:$src1), (load addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +0000413 (implicit EFLAGS)]>;
Evan Cheng55687072007-09-14 21:48:26 +0000414} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000415
416// Aliases of packed SSE1 instructions for scalar use. These all have names that
417// start with 'Fs'.
418
419// Alias instructions that map fld0 to pxor for sse.
Dan Gohman8aef09b2007-09-07 21:32:51 +0000420let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000421def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000422 "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000423 Requires<[HasSSE1]>, TB, OpSize;
424
425// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
426// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +0000427def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000428 "movaps\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000429
430// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
431// disregarded.
Evan Cheng4e84e452007-08-30 05:49:43 +0000432let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000433def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000434 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +0000435 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000436
437// Alias bitwise logical operations using SSE logical ops on packed FP values.
438let isTwoAddress = 1 in {
439let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000440 def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000441 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000442 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000443 def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000444 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000445 [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000446 def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000447 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000448 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
449}
450
Evan Chengb783fa32007-07-19 01:14:50 +0000451def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000452 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000453 [(set FR32:$dst, (X86fand FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000454 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000455def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000456 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000457 [(set FR32:$dst, (X86for FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000458 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000459def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000460 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000461 [(set FR32:$dst, (X86fxor FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000462 (memopfsf32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000463
464def FsANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000465 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000466 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000467def FsANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000468 (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000469 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000470}
471
472/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
473///
474/// In addition, we also have a special variant of the scalar form here to
475/// represent the associated intrinsic operation. This form is unlike the
476/// plain scalar form, in that it takes an entire vector (instead of a scalar)
477/// and leaves the top elements undefined.
478///
479/// These three forms can each be reg+reg or reg+mem, so there are a total of
480/// six "instructions".
481///
482let isTwoAddress = 1 in {
483multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
484 SDNode OpNode, Intrinsic F32Int,
485 bit Commutable = 0> {
486 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000487 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000488 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000489 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
490 let isCommutable = Commutable;
491 }
492
493 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000494 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000495 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000496 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
497
498 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000499 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000500 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000501 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
502 let isCommutable = Commutable;
503 }
504
505 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000506 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000507 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000508 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000509
510 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000511 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000512 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000513 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
514 let isCommutable = Commutable;
515 }
516
517 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000518 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000519 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000520 [(set VR128:$dst, (F32Int VR128:$src1,
521 sse_load_f32:$src2))]>;
522}
523}
524
525// Arithmetic instructions
526defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
527defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
528defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
529defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
530
531/// sse1_fp_binop_rm - Other SSE1 binops
532///
533/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
534/// instructions for a full-vector intrinsic form. Operations that map
535/// onto C operators don't use this form since they just use the plain
536/// vector form instead of having a separate vector intrinsic form.
537///
538/// This provides a total of eight "instructions".
539///
540let isTwoAddress = 1 in {
541multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
542 SDNode OpNode,
543 Intrinsic F32Int,
544 Intrinsic V4F32Int,
545 bit Commutable = 0> {
546
547 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000548 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000549 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000550 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
551 let isCommutable = Commutable;
552 }
553
554 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000555 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000556 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000557 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
558
559 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000560 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000561 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000562 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
563 let isCommutable = Commutable;
564 }
565
566 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000567 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000568 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000569 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000570
571 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000572 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000573 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000574 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
575 let isCommutable = Commutable;
576 }
577
578 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000579 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000580 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000581 [(set VR128:$dst, (F32Int VR128:$src1,
582 sse_load_f32:$src2))]>;
583
584 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000585 def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000586 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000587 [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
588 let isCommutable = Commutable;
589 }
590
591 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +0000592 def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000593 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000594 [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
595}
596}
597
598defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
599 int_x86_sse_max_ss, int_x86_sse_max_ps>;
600defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
601 int_x86_sse_min_ss, int_x86_sse_min_ps>;
602
603//===----------------------------------------------------------------------===//
604// SSE packed FP Instructions
605
606// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000607def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000608 "movaps\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000609let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000610def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000611 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000612 [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000613
Evan Chengb783fa32007-07-19 01:14:50 +0000614def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000615 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000616 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000617
Evan Chengb783fa32007-07-19 01:14:50 +0000618def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000619 "movups\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000620let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000621def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000622 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000623 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000624def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000625 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000626 [(store (v4f32 VR128:$src), addr:$dst)]>;
627
628// Intrinsic forms of MOVUPS load and store
Evan Cheng4e84e452007-08-30 05:49:43 +0000629let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000630def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000631 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000632 [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000633def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000634 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000635 [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000636
637let isTwoAddress = 1 in {
638 let AddedComplexity = 20 in {
639 def MOVLPSrm : PSI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000640 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000641 "movlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000642 [(set VR128:$dst,
643 (v4f32 (vector_shuffle VR128:$src1,
644 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
645 MOVLP_shuffle_mask)))]>;
646 def MOVHPSrm : PSI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000647 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000648 "movhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000649 [(set VR128:$dst,
650 (v4f32 (vector_shuffle VR128:$src1,
651 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
652 MOVHP_shuffle_mask)))]>;
653 } // AddedComplexity
654} // isTwoAddress
655
Evan Chengb783fa32007-07-19 01:14:50 +0000656def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000657 "movlps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000658 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
659 (iPTR 0))), addr:$dst)]>;
660
661// v2f64 extract element 1 is always custom lowered to unpack high to low
662// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +0000663def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000664 "movhps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000665 [(store (f64 (vector_extract
666 (v2f64 (vector_shuffle
667 (bc_v2f64 (v4f32 VR128:$src)), (undef),
668 UNPCKH_shuffle_mask)), (iPTR 0))),
669 addr:$dst)]>;
670
671let isTwoAddress = 1 in {
672let AddedComplexity = 15 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000673def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000674 "movlhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000675 [(set VR128:$dst,
676 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
677 MOVHP_shuffle_mask)))]>;
678
Evan Chengb783fa32007-07-19 01:14:50 +0000679def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000680 "movhlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000681 [(set VR128:$dst,
682 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
683 MOVHLPS_shuffle_mask)))]>;
684} // AddedComplexity
685} // isTwoAddress
686
687
688
689// Arithmetic
690
691/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
692///
693/// In addition, we also have a special variant of the scalar form here to
694/// represent the associated intrinsic operation. This form is unlike the
695/// plain scalar form, in that it takes an entire vector (instead of a
696/// scalar) and leaves the top elements undefined.
697///
698/// And, we have a special variant form for a full-vector intrinsic form.
699///
700/// These four forms can each have a reg or a mem operand, so there are a
701/// total of eight "instructions".
702///
703multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
704 SDNode OpNode,
705 Intrinsic F32Int,
706 Intrinsic V4F32Int,
707 bit Commutable = 0> {
708 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000709 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000710 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000711 [(set FR32:$dst, (OpNode FR32:$src))]> {
712 let isCommutable = Commutable;
713 }
714
715 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000716 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000717 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000718 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
719
720 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000721 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000722 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000723 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
724 let isCommutable = Commutable;
725 }
726
727 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000728 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000729 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000730 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000731
732 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000733 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000734 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000735 [(set VR128:$dst, (F32Int VR128:$src))]> {
736 let isCommutable = Commutable;
737 }
738
739 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000740 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000741 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000742 [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
743
744 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +0000745 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000746 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000747 [(set VR128:$dst, (V4F32Int VR128:$src))]> {
748 let isCommutable = Commutable;
749 }
750
751 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +0000752 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000753 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000754 [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
755}
756
757// Square root.
758defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
759 int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
760
761// Reciprocal approximations. Note that these typically require refinement
762// in order to obtain suitable precision.
763defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
764 int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
765defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
766 int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
767
768// Logical
769let isTwoAddress = 1 in {
770 let isCommutable = 1 in {
771 def ANDPSrr : PSI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000772 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000773 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000774 [(set VR128:$dst, (v2i64
775 (and VR128:$src1, VR128:$src2)))]>;
776 def ORPSrr : PSI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000777 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000778 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000779 [(set VR128:$dst, (v2i64
780 (or VR128:$src1, VR128:$src2)))]>;
781 def XORPSrr : PSI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000782 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000783 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000784 [(set VR128:$dst, (v2i64
785 (xor VR128:$src1, VR128:$src2)))]>;
786 }
787
788 def ANDPSrm : PSI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000789 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000790 "andps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000791 [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
792 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000793 def ORPSrm : PSI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000794 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000795 "orps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000796 [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
797 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000798 def XORPSrm : PSI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000799 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000800 "xorps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000801 [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
802 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000803 def ANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000804 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000805 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000806 [(set VR128:$dst,
807 (v2i64 (and (xor VR128:$src1,
808 (bc_v2i64 (v4i32 immAllOnesV))),
809 VR128:$src2)))]>;
810 def ANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000811 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000812 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000813 [(set VR128:$dst,
Evan Cheng8e92cd12007-07-19 23:34:10 +0000814 (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000815 (bc_v2i64 (v4i32 immAllOnesV))),
Evan Cheng8e92cd12007-07-19 23:34:10 +0000816 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000817}
818
819let isTwoAddress = 1 in {
820 def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000821 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000822 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000823 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
824 VR128:$src, imm:$cc))]>;
825 def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000826 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000827 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000828 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
829 (load addr:$src), imm:$cc))]>;
830}
831
832// Shuffle and unpack instructions
833let isTwoAddress = 1 in {
834 let isConvertibleToThreeAddress = 1 in // Convert to pshufd
835 def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000836 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000837 VR128:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000838 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000839 [(set VR128:$dst,
840 (v4f32 (vector_shuffle
841 VR128:$src1, VR128:$src2,
842 SHUFP_shuffle_mask:$src3)))]>;
843 def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000844 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000845 f128mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000846 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000847 [(set VR128:$dst,
848 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000849 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000850 SHUFP_shuffle_mask:$src3)))]>;
851
852 let AddedComplexity = 10 in {
853 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000854 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000855 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000856 [(set VR128:$dst,
857 (v4f32 (vector_shuffle
858 VR128:$src1, VR128:$src2,
859 UNPCKH_shuffle_mask)))]>;
860 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000861 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000862 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000863 [(set VR128:$dst,
864 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000865 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000866 UNPCKH_shuffle_mask)))]>;
867
868 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000869 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000870 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000871 [(set VR128:$dst,
872 (v4f32 (vector_shuffle
873 VR128:$src1, VR128:$src2,
874 UNPCKL_shuffle_mask)))]>;
875 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000876 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000877 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000878 [(set VR128:$dst,
879 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000880 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000881 UNPCKL_shuffle_mask)))]>;
882 } // AddedComplexity
883} // isTwoAddress
884
885// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +0000886def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000887 "movmskps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000888 [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000889def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000890 "movmskpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000891 [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
892
893// Prefetching loads.
894// TODO: no intrinsics for these?
Dan Gohman91888f02007-07-31 20:11:57 +0000895def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", []>;
896def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1\t$src", []>;
897def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2\t$src", []>;
898def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000899
900// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +0000901def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000902 "movntps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000903 [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
904
905// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +0000906def SFENCE : PSI<0xAE, MRM7m, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000907
908// MXCSR register
Evan Chengb783fa32007-07-19 01:14:50 +0000909def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000910 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000911def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
Dan Gohman91888f02007-07-31 20:11:57 +0000912 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000913
914// Alias instructions that map zero vector to pxor / xorp* for sse.
915// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
916let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000917def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000918 "xorps\t$dst, $dst",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000919 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
920
921// FR32 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +0000922def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000923 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000924 [(set VR128:$dst,
925 (v4f32 (scalar_to_vector FR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000926def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000927 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000928 [(set VR128:$dst,
929 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
930
931// FIXME: may not be able to eliminate this movss with coalescing the src and
932// dest register classes are different. We really want to write this pattern
933// like this:
934// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
935// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +0000936def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000937 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000938 [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
939 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000940def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000941 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000942 [(store (f32 (vector_extract (v4f32 VR128:$src),
943 (iPTR 0))), addr:$dst)]>;
944
945
946// Move to lower bits of a VR128, leaving upper bits alone.
947// Three operand (but two address) aliases.
948let isTwoAddress = 1 in {
949 def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000950 (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000951 "movss\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000952
953 let AddedComplexity = 15 in
954 def MOVLPSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000955 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000956 "movss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000957 [(set VR128:$dst,
958 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
959 MOVL_shuffle_mask)))]>;
960}
961
962// Move to lower bits of a VR128 and zeroing upper bits.
963// Loading from memory automatically zeroing upper bits.
964let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +0000965def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000966 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000967 [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
968 (v4f32 (scalar_to_vector (loadf32 addr:$src))),
969 MOVL_shuffle_mask)))]>;
970
971
972//===----------------------------------------------------------------------===//
973// SSE2 Instructions
974//===----------------------------------------------------------------------===//
975
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000976// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000977def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000978 "movsd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000979let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000980def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000981 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000982 [(set FR64:$dst, (loadf64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000983def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000984 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000985 [(store FR64:$src, addr:$dst)]>;
986
987// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000988def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000989 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000990 [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000991def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000992 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000993 [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000994def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000995 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000996 [(set FR32:$dst, (fround FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000997def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000998 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000999 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001000def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001001 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001002 [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001003def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001004 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001005 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
1006
1007// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001008def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001009 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001010 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
1011 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001012def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001013 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001014 [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
1015 Requires<[HasSSE2]>;
1016
1017// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +00001018def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001019 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001020 [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001021def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001022 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001023 [(set GR32:$dst, (int_x86_sse2_cvtsd2si
1024 (load addr:$src)))]>;
1025
1026// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +00001027def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001028 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001029 [(set GR32:$dst,
1030 (int_x86_sse2_cvttsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001031def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001032 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001033 [(set GR32:$dst, (int_x86_sse2_cvttsd2si
1034 (load addr:$src)))]>;
1035
1036// Comparison instructions
1037let isTwoAddress = 1 in {
1038 def CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001039 (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001040 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001041 def CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001042 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001043 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001044}
1045
Evan Cheng950aac02007-09-25 01:57:46 +00001046let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +00001047def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001048 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001049 [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001050def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001051 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001052 [(X86cmp FR64:$src1, (loadf64 addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +00001053 (implicit EFLAGS)]>;
1054}
1055
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001056// Aliases to match intrinsics which expect XMM operand(s).
1057let isTwoAddress = 1 in {
1058 def Int_CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001059 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001060 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001061 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1062 VR128:$src, imm:$cc))]>;
1063 def Int_CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001064 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001065 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001066 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1067 (load addr:$src), imm:$cc))]>;
1068}
1069
Evan Cheng950aac02007-09-25 01:57:46 +00001070let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +00001071def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001072 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001073 [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
1074 (implicit EFLAGS)]>;
1075def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001076 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001077 [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)),
1078 (implicit EFLAGS)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001079
Evan Chengb783fa32007-07-19 01:14:50 +00001080def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001081 "comisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001082 [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
1083 (implicit EFLAGS)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001084def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001085 "comisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001086 [(X86comi (v2f64 VR128:$src1), (load addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +00001087 (implicit EFLAGS)]>;
1088} // Defs = EFLAGS]
1089
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001090// Aliases of packed SSE2 instructions for scalar use. These all have names that
1091// start with 'Fs'.
1092
1093// Alias instructions that map fld0 to pxor for sse.
Dan Gohman8aef09b2007-09-07 21:32:51 +00001094let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001095def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00001096 "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001097 Requires<[HasSSE2]>, TB, OpSize;
1098
1099// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
1100// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +00001101def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001102 "movapd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001103
1104// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
1105// disregarded.
Evan Cheng4e84e452007-08-30 05:49:43 +00001106let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001107def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001108 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +00001109 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001110
1111// Alias bitwise logical operations using SSE logical ops on packed FP values.
1112let isTwoAddress = 1 in {
1113let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +00001114 def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001115 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001116 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001117 def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001118 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001119 [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001120 def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001121 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001122 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
1123}
1124
Evan Chengb783fa32007-07-19 01:14:50 +00001125def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001126 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001127 [(set FR64:$dst, (X86fand FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001128 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001129def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001130 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001131 [(set FR64:$dst, (X86for FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001132 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001133def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001134 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001135 [(set FR64:$dst, (X86fxor FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001136 (memopfsf64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001137
1138def FsANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001139 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001140 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001141def FsANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001142 (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001143 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001144}
1145
1146/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
1147///
1148/// In addition, we also have a special variant of the scalar form here to
1149/// represent the associated intrinsic operation. This form is unlike the
1150/// plain scalar form, in that it takes an entire vector (instead of a scalar)
1151/// and leaves the top elements undefined.
1152///
1153/// These three forms can each be reg+reg or reg+mem, so there are a total of
1154/// six "instructions".
1155///
1156let isTwoAddress = 1 in {
1157multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1158 SDNode OpNode, Intrinsic F64Int,
1159 bit Commutable = 0> {
1160 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001161 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001162 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001163 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1164 let isCommutable = Commutable;
1165 }
1166
1167 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001168 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001169 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001170 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1171
1172 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001173 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001174 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001175 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1176 let isCommutable = Commutable;
1177 }
1178
1179 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001180 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001181 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001182 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001183
1184 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001185 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001186 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001187 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1188 let isCommutable = Commutable;
1189 }
1190
1191 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001192 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001193 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001194 [(set VR128:$dst, (F64Int VR128:$src1,
1195 sse_load_f64:$src2))]>;
1196}
1197}
1198
1199// Arithmetic instructions
1200defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
1201defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
1202defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
1203defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
1204
1205/// sse2_fp_binop_rm - Other SSE2 binops
1206///
1207/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
1208/// instructions for a full-vector intrinsic form. Operations that map
1209/// onto C operators don't use this form since they just use the plain
1210/// vector form instead of having a separate vector intrinsic form.
1211///
1212/// This provides a total of eight "instructions".
1213///
1214let isTwoAddress = 1 in {
1215multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1216 SDNode OpNode,
1217 Intrinsic F64Int,
1218 Intrinsic V2F64Int,
1219 bit Commutable = 0> {
1220
1221 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001222 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001223 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001224 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1225 let isCommutable = Commutable;
1226 }
1227
1228 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001229 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001230 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001231 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1232
1233 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001234 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001235 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001236 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1237 let isCommutable = Commutable;
1238 }
1239
1240 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001241 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001242 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001243 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001244
1245 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001246 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001247 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001248 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1249 let isCommutable = Commutable;
1250 }
1251
1252 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001253 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001254 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001255 [(set VR128:$dst, (F64Int VR128:$src1,
1256 sse_load_f64:$src2))]>;
1257
1258 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001259 def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001260 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001261 [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
1262 let isCommutable = Commutable;
1263 }
1264
1265 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +00001266 def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001267 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001268 [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
1269}
1270}
1271
1272defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
1273 int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
1274defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
1275 int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
1276
1277//===----------------------------------------------------------------------===//
1278// SSE packed FP Instructions
1279
1280// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001281def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001282 "movapd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001283let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001284def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001285 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001286 [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001287
Evan Chengb783fa32007-07-19 01:14:50 +00001288def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001289 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001290 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001291
Evan Chengb783fa32007-07-19 01:14:50 +00001292def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001293 "movupd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001294let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001295def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001296 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001297 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001298def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001299 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001300 [(store (v2f64 VR128:$src), addr:$dst)]>;
1301
1302// Intrinsic forms of MOVUPD load and store
Evan Chengb783fa32007-07-19 01:14:50 +00001303def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001304 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001305 [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001306def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001307 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001308 [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001309
1310let isTwoAddress = 1 in {
1311 let AddedComplexity = 20 in {
1312 def MOVLPDrm : PDI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001313 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001314 "movlpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001315 [(set VR128:$dst,
1316 (v2f64 (vector_shuffle VR128:$src1,
1317 (scalar_to_vector (loadf64 addr:$src2)),
1318 MOVLP_shuffle_mask)))]>;
1319 def MOVHPDrm : PDI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001320 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001321 "movhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001322 [(set VR128:$dst,
1323 (v2f64 (vector_shuffle VR128:$src1,
1324 (scalar_to_vector (loadf64 addr:$src2)),
1325 MOVHP_shuffle_mask)))]>;
1326 } // AddedComplexity
1327} // isTwoAddress
1328
Evan Chengb783fa32007-07-19 01:14:50 +00001329def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001330 "movlpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001331 [(store (f64 (vector_extract (v2f64 VR128:$src),
1332 (iPTR 0))), addr:$dst)]>;
1333
1334// v2f64 extract element 1 is always custom lowered to unpack high to low
1335// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +00001336def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001337 "movhpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001338 [(store (f64 (vector_extract
1339 (v2f64 (vector_shuffle VR128:$src, (undef),
1340 UNPCKH_shuffle_mask)), (iPTR 0))),
1341 addr:$dst)]>;
1342
1343// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001344def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001345 "cvtdq2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001346 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
1347 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001348def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001349 "cvtdq2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001350 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
Dan Gohman4a4f1512007-07-18 20:23:34 +00001351 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001352 TB, Requires<[HasSSE2]>;
1353
1354// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001355def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001356 "cvtdq2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001357 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
1358 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001359def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001360 "cvtdq2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001361 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
Dan Gohman4a4f1512007-07-18 20:23:34 +00001362 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001363 XS, Requires<[HasSSE2]>;
1364
Evan Chengb783fa32007-07-19 01:14:50 +00001365def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001366 "cvtps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001367 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001368def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001369 "cvtps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001370 [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1371 (load addr:$src)))]>;
1372// SSE2 packed instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001373def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001374 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001375 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
1376 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001377def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001378 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001379 [(set VR128:$dst, (int_x86_sse2_cvttps2dq
1380 (load addr:$src)))]>,
1381 XS, Requires<[HasSSE2]>;
1382
1383// SSE2 packed instructions with XD prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001384def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001385 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001386 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
1387 XD, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001388def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001389 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001390 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1391 (load addr:$src)))]>,
1392 XD, Requires<[HasSSE2]>;
1393
Evan Chengb783fa32007-07-19 01:14:50 +00001394def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001395 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001396 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001397def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001398 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001399 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1400 (load addr:$src)))]>;
1401
1402// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001403def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001404 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001405 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
1406 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001407def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001408 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001409 [(set VR128:$dst, (int_x86_sse2_cvtps2pd
1410 (load addr:$src)))]>,
1411 TB, Requires<[HasSSE2]>;
1412
Evan Chengb783fa32007-07-19 01:14:50 +00001413def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001414 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001415 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001416def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001417 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001418 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
1419 (load addr:$src)))]>;
1420
1421// Match intrinsics which expect XMM operand(s).
1422// Aliases for intrinsics
1423let isTwoAddress = 1 in {
1424def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001425 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001426 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001427 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1428 GR32:$src2))]>;
1429def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001430 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001431 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001432 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1433 (loadi32 addr:$src2)))]>;
1434def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001435 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001436 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001437 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1438 VR128:$src2))]>;
1439def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001440 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001441 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001442 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1443 (load addr:$src2)))]>;
1444def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001445 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001446 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001447 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1448 VR128:$src2))]>, XS,
1449 Requires<[HasSSE2]>;
1450def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001451 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001452 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001453 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1454 (load addr:$src2)))]>, XS,
1455 Requires<[HasSSE2]>;
1456}
1457
1458// Arithmetic
1459
1460/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
1461///
1462/// In addition, we also have a special variant of the scalar form here to
1463/// represent the associated intrinsic operation. This form is unlike the
1464/// plain scalar form, in that it takes an entire vector (instead of a
1465/// scalar) and leaves the top elements undefined.
1466///
1467/// And, we have a special variant form for a full-vector intrinsic form.
1468///
1469/// These four forms can each have a reg or a mem operand, so there are a
1470/// total of eight "instructions".
1471///
1472multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
1473 SDNode OpNode,
1474 Intrinsic F64Int,
1475 Intrinsic V2F64Int,
1476 bit Commutable = 0> {
1477 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001478 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001479 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001480 [(set FR64:$dst, (OpNode FR64:$src))]> {
1481 let isCommutable = Commutable;
1482 }
1483
1484 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001485 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001486 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001487 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
1488
1489 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001490 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001491 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001492 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
1493 let isCommutable = Commutable;
1494 }
1495
1496 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001497 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001498 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001499 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001500
1501 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001502 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001503 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001504 [(set VR128:$dst, (F64Int VR128:$src))]> {
1505 let isCommutable = Commutable;
1506 }
1507
1508 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001509 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001510 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001511 [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
1512
1513 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +00001514 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001515 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001516 [(set VR128:$dst, (V2F64Int VR128:$src))]> {
1517 let isCommutable = Commutable;
1518 }
1519
1520 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +00001521 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001522 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001523 [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
1524}
1525
1526// Square root.
1527defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
1528 int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
1529
1530// There is no f64 version of the reciprocal approximation instructions.
1531
1532// Logical
1533let isTwoAddress = 1 in {
1534 let isCommutable = 1 in {
1535 def ANDPDrr : PDI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001536 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001537 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001538 [(set VR128:$dst,
1539 (and (bc_v2i64 (v2f64 VR128:$src1)),
1540 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1541 def ORPDrr : PDI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001542 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001543 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001544 [(set VR128:$dst,
1545 (or (bc_v2i64 (v2f64 VR128:$src1)),
1546 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1547 def XORPDrr : PDI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001548 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001549 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001550 [(set VR128:$dst,
1551 (xor (bc_v2i64 (v2f64 VR128:$src1)),
1552 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1553 }
1554
1555 def ANDPDrm : PDI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001556 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001557 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001558 [(set VR128:$dst,
1559 (and (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001560 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001561 def ORPDrm : PDI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001562 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001563 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001564 [(set VR128:$dst,
1565 (or (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001566 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001567 def XORPDrm : PDI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001568 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001569 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001570 [(set VR128:$dst,
1571 (xor (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001572 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001573 def ANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001574 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001575 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001576 [(set VR128:$dst,
1577 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
1578 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1579 def ANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001580 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001581 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001582 [(set VR128:$dst,
1583 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001584 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001585}
1586
1587let isTwoAddress = 1 in {
1588 def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001589 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001590 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001591 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1592 VR128:$src, imm:$cc))]>;
1593 def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001594 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001595 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001596 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1597 (load addr:$src), imm:$cc))]>;
1598}
1599
1600// Shuffle and unpack instructions
1601let isTwoAddress = 1 in {
1602 def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001603 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001604 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001605 [(set VR128:$dst, (v2f64 (vector_shuffle
1606 VR128:$src1, VR128:$src2,
1607 SHUFP_shuffle_mask:$src3)))]>;
1608 def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001609 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001610 f128mem:$src2, i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001611 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001612 [(set VR128:$dst,
1613 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001614 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001615 SHUFP_shuffle_mask:$src3)))]>;
1616
1617 let AddedComplexity = 10 in {
1618 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001619 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001620 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001621 [(set VR128:$dst,
1622 (v2f64 (vector_shuffle
1623 VR128:$src1, VR128:$src2,
1624 UNPCKH_shuffle_mask)))]>;
1625 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001626 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001627 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001628 [(set VR128:$dst,
1629 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001630 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001631 UNPCKH_shuffle_mask)))]>;
1632
1633 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001634 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001635 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001636 [(set VR128:$dst,
1637 (v2f64 (vector_shuffle
1638 VR128:$src1, VR128:$src2,
1639 UNPCKL_shuffle_mask)))]>;
1640 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001641 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001642 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001643 [(set VR128:$dst,
1644 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001645 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001646 UNPCKL_shuffle_mask)))]>;
1647 } // AddedComplexity
1648} // isTwoAddress
1649
1650
1651//===----------------------------------------------------------------------===//
1652// SSE integer instructions
1653
1654// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001655def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001656 "movdqa\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001657let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001658def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001659 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001660 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001661def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001662 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001663 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001664let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001665def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001666 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001667 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001668 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001669def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001670 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001671 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001672 XS, Requires<[HasSSE2]>;
1673
Dan Gohman4a4f1512007-07-18 20:23:34 +00001674// Intrinsic forms of MOVDQU load and store
Evan Cheng4e84e452007-08-30 05:49:43 +00001675let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001676def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001677 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001678 [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
1679 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001680def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001681 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001682 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
1683 XS, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001684
1685let isTwoAddress = 1 in {
1686
1687multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
1688 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001689 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001690 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001691 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
1692 let isCommutable = Commutable;
1693 }
Evan Chengb783fa32007-07-19 01:14:50 +00001694 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001695 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001696 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001697 (bitconvert (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001698}
1699
1700multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
1701 string OpcodeStr, Intrinsic IntId> {
Evan Chengb783fa32007-07-19 01:14:50 +00001702 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001703 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001704 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001705 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001706 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001707 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001708 (bitconvert (memopv2i64 addr:$src2))))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001709 def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001710 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001711 [(set VR128:$dst, (IntId VR128:$src1,
1712 (scalar_to_vector (i32 imm:$src2))))]>;
1713}
1714
1715
1716/// PDI_binop_rm - Simple SSE2 binary operator.
1717multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1718 ValueType OpVT, bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001719 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001720 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001721 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
1722 let isCommutable = Commutable;
1723 }
Evan Chengb783fa32007-07-19 01:14:50 +00001724 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001725 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001726 [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001727 (bitconvert (memopv2i64 addr:$src2)))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001728}
1729
1730/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
1731///
1732/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
1733/// to collapse (bitconvert VT to VT) into its operand.
1734///
1735multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
1736 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001737 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001738 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001739 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
1740 let isCommutable = Commutable;
1741 }
Evan Chengb783fa32007-07-19 01:14:50 +00001742 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001743 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001744 [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001745}
1746
1747} // isTwoAddress
1748
1749// 128-bit Integer Arithmetic
1750
1751defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
1752defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
1753defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
1754defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
1755
1756defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
1757defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
1758defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
1759defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
1760
1761defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
1762defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
1763defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
1764defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
1765
1766defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
1767defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
1768defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
1769defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
1770
1771defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
1772
1773defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
1774defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
1775defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
1776
1777defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
1778
1779defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
1780defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
1781
1782
1783defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
1784defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
1785defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
1786defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
1787defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
1788
1789
1790defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
1791defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
1792defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
1793
1794defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
1795defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
1796defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
1797
1798defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
1799defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
1800// PSRAQ doesn't exist in SSE[1-3].
1801
1802// 128-bit logical shifts.
1803let isTwoAddress = 1 in {
1804 def PSLLDQri : PDIi8<0x73, MRM7r,
Evan Chengb783fa32007-07-19 01:14:50 +00001805 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001806 "pslldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001807 def PSRLDQri : PDIi8<0x73, MRM3r,
Evan Chengb783fa32007-07-19 01:14:50 +00001808 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001809 "psrldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001810 // PSRADQri doesn't exist in SSE[1-3].
1811}
1812
1813let Predicates = [HasSSE2] in {
1814 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
1815 (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1816 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
1817 (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1818 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
1819 (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1820}
1821
1822// Logical
1823defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
1824defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
1825defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
1826
1827let isTwoAddress = 1 in {
1828 def PANDNrr : PDI<0xDF, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001829 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001830 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001831 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1832 VR128:$src2)))]>;
1833
1834 def PANDNrm : PDI<0xDF, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001835 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001836 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001837 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
Dan Gohman7dc19012007-08-02 21:17:01 +00001838 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001839}
1840
1841// SSE2 Integer comparison
1842defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
1843defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
1844defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
1845defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
1846defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
1847defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
1848
1849// Pack instructions
1850defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
1851defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
1852defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
1853
1854// Shuffle and unpack instructions
1855def PSHUFDri : PDIi8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001856 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001857 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001858 [(set VR128:$dst, (v4i32 (vector_shuffle
1859 VR128:$src1, (undef),
1860 PSHUFD_shuffle_mask:$src2)))]>;
1861def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001862 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001863 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001864 [(set VR128:$dst, (v4i32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001865 (bc_v4i32(memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001866 (undef),
1867 PSHUFD_shuffle_mask:$src2)))]>;
1868
1869// SSE2 with ImmT == Imm8 and XS prefix.
1870def PSHUFHWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001871 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001872 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001873 [(set VR128:$dst, (v8i16 (vector_shuffle
1874 VR128:$src1, (undef),
1875 PSHUFHW_shuffle_mask:$src2)))]>,
1876 XS, Requires<[HasSSE2]>;
1877def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001878 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001879 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001880 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001881 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001882 (undef),
1883 PSHUFHW_shuffle_mask:$src2)))]>,
1884 XS, Requires<[HasSSE2]>;
1885
1886// SSE2 with ImmT == Imm8 and XD prefix.
1887def PSHUFLWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001888 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001889 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001890 [(set VR128:$dst, (v8i16 (vector_shuffle
1891 VR128:$src1, (undef),
1892 PSHUFLW_shuffle_mask:$src2)))]>,
1893 XD, Requires<[HasSSE2]>;
1894def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001895 (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001896 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001897 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001898 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001899 (undef),
1900 PSHUFLW_shuffle_mask:$src2)))]>,
1901 XD, Requires<[HasSSE2]>;
1902
1903
1904let isTwoAddress = 1 in {
1905 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001906 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001907 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001908 [(set VR128:$dst,
1909 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1910 UNPCKL_shuffle_mask)))]>;
1911 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001912 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001913 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001914 [(set VR128:$dst,
1915 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001916 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001917 UNPCKL_shuffle_mask)))]>;
1918 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001919 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001920 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001921 [(set VR128:$dst,
1922 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1923 UNPCKL_shuffle_mask)))]>;
1924 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001925 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001926 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001927 [(set VR128:$dst,
1928 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001929 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001930 UNPCKL_shuffle_mask)))]>;
1931 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001932 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001933 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001934 [(set VR128:$dst,
1935 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1936 UNPCKL_shuffle_mask)))]>;
1937 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001938 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001939 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001940 [(set VR128:$dst,
1941 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001942 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001943 UNPCKL_shuffle_mask)))]>;
1944 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001945 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001946 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001947 [(set VR128:$dst,
1948 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1949 UNPCKL_shuffle_mask)))]>;
1950 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001951 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001952 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001953 [(set VR128:$dst,
1954 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001955 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001956 UNPCKL_shuffle_mask)))]>;
1957
1958 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001959 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001960 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001961 [(set VR128:$dst,
1962 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1963 UNPCKH_shuffle_mask)))]>;
1964 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001965 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001966 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001967 [(set VR128:$dst,
1968 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001969 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001970 UNPCKH_shuffle_mask)))]>;
1971 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001972 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001973 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001974 [(set VR128:$dst,
1975 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1976 UNPCKH_shuffle_mask)))]>;
1977 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001978 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001979 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001980 [(set VR128:$dst,
1981 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001982 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001983 UNPCKH_shuffle_mask)))]>;
1984 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001985 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001986 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001987 [(set VR128:$dst,
1988 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1989 UNPCKH_shuffle_mask)))]>;
1990 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001991 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001992 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001993 [(set VR128:$dst,
1994 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001995 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001996 UNPCKH_shuffle_mask)))]>;
1997 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001998 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001999 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002000 [(set VR128:$dst,
2001 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2002 UNPCKH_shuffle_mask)))]>;
2003 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002004 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002005 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002006 [(set VR128:$dst,
2007 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002008 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002009 UNPCKH_shuffle_mask)))]>;
2010}
2011
2012// Extract / Insert
2013def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002014 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002015 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002016 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
2017 (iPTR imm:$src2)))]>;
2018let isTwoAddress = 1 in {
2019 def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002020 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002021 GR32:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00002022 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002023 [(set VR128:$dst,
2024 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
2025 GR32:$src2, (iPTR imm:$src3))))]>;
2026 def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002027 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002028 i16mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00002029 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002030 [(set VR128:$dst,
2031 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
2032 (i32 (anyext (loadi16 addr:$src2))),
2033 (iPTR imm:$src3))))]>;
2034}
2035
2036// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +00002037def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002038 "pmovmskb\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002039 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
2040
2041// Conditional store
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002042let Uses = [EDI] in
Evan Chengb783fa32007-07-19 01:14:50 +00002043def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
Dan Gohman91888f02007-07-31 20:11:57 +00002044 "maskmovdqu\t{$mask, $src|$src, $mask}",
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002045 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002046
2047// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +00002048def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002049 "movntpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002050 [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002051def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002052 "movntdq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002053 [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002054def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002055 "movnti\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002056 [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
2057 TB, Requires<[HasSSE2]>;
2058
2059// Flush cache
Evan Chengb783fa32007-07-19 01:14:50 +00002060def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002061 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002062 TB, Requires<[HasSSE2]>;
2063
2064// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +00002065def LFENCE : I<0xAE, MRM5m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002066 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002067def MFENCE : I<0xAE, MRM6m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002068 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
2069
2070
2071// Alias instructions that map zero vector to pxor / xorp* for sse.
2072// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
2073let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00002074 def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00002075 "pcmpeqd\t$dst, $dst",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002076 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
2077
2078// FR64 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +00002079def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002080 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002081 [(set VR128:$dst,
2082 (v2f64 (scalar_to_vector FR64:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002083def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002084 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002085 [(set VR128:$dst,
2086 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
2087
Evan Chengb783fa32007-07-19 01:14:50 +00002088def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002089 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002090 [(set VR128:$dst,
2091 (v4i32 (scalar_to_vector GR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002092def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002093 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002094 [(set VR128:$dst,
2095 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
2096
Evan Chengb783fa32007-07-19 01:14:50 +00002097def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002098 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002099 [(set FR32:$dst, (bitconvert GR32:$src))]>;
2100
Evan Chengb783fa32007-07-19 01:14:50 +00002101def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002102 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002103 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
2104
2105// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00002106def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002107 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002108 [(set VR128:$dst,
2109 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
2110 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002111def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002112 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002113 [(store (i64 (vector_extract (v2i64 VR128:$src),
2114 (iPTR 0))), addr:$dst)]>;
2115
2116// FIXME: may not be able to eliminate this movss with coalescing the src and
2117// dest register classes are different. We really want to write this pattern
2118// like this:
2119// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
2120// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +00002121def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002122 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002123 [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
2124 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002125def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002126 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002127 [(store (f64 (vector_extract (v2f64 VR128:$src),
2128 (iPTR 0))), addr:$dst)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002129def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002130 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002131 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
2132 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002133def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002134 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002135 [(store (i32 (vector_extract (v4i32 VR128:$src),
2136 (iPTR 0))), addr:$dst)]>;
2137
Evan Chengb783fa32007-07-19 01:14:50 +00002138def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002139 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002140 [(set GR32:$dst, (bitconvert FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002141def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002142 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002143 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
2144
2145
2146// Move to lower bits of a VR128, leaving upper bits alone.
2147// Three operand (but two address) aliases.
2148let isTwoAddress = 1 in {
2149 def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002150 (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002151 "movsd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002152
2153 let AddedComplexity = 15 in
2154 def MOVLPDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002155 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002156 "movsd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002157 [(set VR128:$dst,
2158 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
2159 MOVL_shuffle_mask)))]>;
2160}
2161
2162// Store / copy lower 64-bits of a XMM register.
Evan Chengb783fa32007-07-19 01:14:50 +00002163def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002164 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002165 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
2166
2167// Move to lower bits of a VR128 and zeroing upper bits.
2168// Loading from memory automatically zeroing upper bits.
2169let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002170 def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002171 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002172 [(set VR128:$dst,
2173 (v2f64 (vector_shuffle immAllZerosV,
2174 (v2f64 (scalar_to_vector
2175 (loadf64 addr:$src))),
2176 MOVL_shuffle_mask)))]>;
2177
2178let AddedComplexity = 15 in
2179// movd / movq to XMM register zero-extends
Evan Chengb783fa32007-07-19 01:14:50 +00002180def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002181 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002182 [(set VR128:$dst,
2183 (v4i32 (vector_shuffle immAllZerosV,
2184 (v4i32 (scalar_to_vector GR32:$src)),
2185 MOVL_shuffle_mask)))]>;
2186let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002187def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002188 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002189 [(set VR128:$dst,
2190 (v4i32 (vector_shuffle immAllZerosV,
2191 (v4i32 (scalar_to_vector (loadi32 addr:$src))),
2192 MOVL_shuffle_mask)))]>;
2193
2194// Moving from XMM to XMM but still clear upper 64 bits.
2195let AddedComplexity = 15 in
Evan Chengb783fa32007-07-19 01:14:50 +00002196def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002197 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002198 [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
2199 XS, Requires<[HasSSE2]>;
2200let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002201def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002202 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002203 [(set VR128:$dst, (int_x86_sse2_movl_dq
Dan Gohman4a4f1512007-07-18 20:23:34 +00002204 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002205 XS, Requires<[HasSSE2]>;
2206
2207
2208//===----------------------------------------------------------------------===//
2209// SSE3 Instructions
2210//===----------------------------------------------------------------------===//
2211
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002212// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00002213def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002214 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002215 [(set VR128:$dst, (v4f32 (vector_shuffle
2216 VR128:$src, (undef),
2217 MOVSHDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002218def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002219 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002220 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002221 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002222 MOVSHDUP_shuffle_mask)))]>;
2223
Evan Chengb783fa32007-07-19 01:14:50 +00002224def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002225 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002226 [(set VR128:$dst, (v4f32 (vector_shuffle
2227 VR128:$src, (undef),
2228 MOVSLDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002229def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002230 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002231 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002232 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002233 MOVSLDUP_shuffle_mask)))]>;
2234
Evan Chengb783fa32007-07-19 01:14:50 +00002235def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002236 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002237 [(set VR128:$dst, (v2f64 (vector_shuffle
2238 VR128:$src, (undef),
2239 SSE_splat_lo_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002240def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002241 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002242 [(set VR128:$dst,
2243 (v2f64 (vector_shuffle
2244 (scalar_to_vector (loadf64 addr:$src)),
2245 (undef),
2246 SSE_splat_lo_mask)))]>;
2247
2248// Arithmetic
2249let isTwoAddress = 1 in {
2250 def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002251 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002252 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002253 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2254 VR128:$src2))]>;
2255 def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002256 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002257 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002258 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2259 (load addr:$src2)))]>;
2260 def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002261 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002262 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002263 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2264 VR128:$src2))]>;
2265 def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002266 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002267 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002268 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2269 (load addr:$src2)))]>;
2270}
2271
Evan Chengb783fa32007-07-19 01:14:50 +00002272def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002273 "lddqu\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002274 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
2275
2276// Horizontal ops
2277class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002278 : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002279 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002280 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
2281class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002282 : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002283 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002284 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
2285class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002286 : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002287 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002288 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
2289class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002290 : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002291 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002292 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
2293
2294let isTwoAddress = 1 in {
2295 def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2296 def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2297 def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2298 def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2299 def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2300 def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2301 def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2302 def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2303}
2304
2305// Thread synchronization
Evan Chengb783fa32007-07-19 01:14:50 +00002306def MONITOR : I<0xC8, RawFrm, (outs), (ins), "monitor",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002307 [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002308def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002309 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
2310
2311// vector_shuffle v1, <undef> <1, 1, 3, 3>
2312let AddedComplexity = 15 in
2313def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2314 MOVSHDUP_shuffle_mask)),
2315 (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2316let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002317def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002318 MOVSHDUP_shuffle_mask)),
2319 (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
2320
2321// vector_shuffle v1, <undef> <0, 0, 2, 2>
2322let AddedComplexity = 15 in
2323 def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2324 MOVSLDUP_shuffle_mask)),
2325 (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2326let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002327 def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002328 MOVSLDUP_shuffle_mask)),
2329 (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
2330
2331//===----------------------------------------------------------------------===//
2332// SSSE3 Instructions
2333//===----------------------------------------------------------------------===//
2334
Bill Wendling3b15d722007-08-11 09:52:53 +00002335// SSSE3 Instruction Templates:
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002336//
Bill Wendling98680292007-08-10 06:22:27 +00002337// SS38I - SSSE3 instructions with T8 prefix.
2338// SS3AI - SSSE3 instructions with TA prefix.
Bill Wendling3b15d722007-08-11 09:52:53 +00002339//
2340// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
2341// uses the MMX registers. We put those instructions here because they better
2342// fit into the SSSE3 instruction category rather than the MMX category.
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002343
Evan Chengb783fa32007-07-19 01:14:50 +00002344class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
2345 list<dag> pattern>
Bill Wendling98680292007-08-10 06:22:27 +00002346 : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002347class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
2348 list<dag> pattern>
Bill Wendling98680292007-08-10 06:22:27 +00002349 : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002350
Bill Wendling98680292007-08-10 06:22:27 +00002351/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002352let isTwoAddress = 1 in {
Bill Wendling98680292007-08-10 06:22:27 +00002353 multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
2354 Intrinsic IntId64, Intrinsic IntId128,
2355 bit Commutable = 0> {
2356 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
2357 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2358 [(set VR64:$dst, (IntId64 VR64:$src))]> {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002359 let isCommutable = Commutable;
2360 }
Bill Wendling98680292007-08-10 06:22:27 +00002361 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
2362 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2363 [(set VR64:$dst,
2364 (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
2365
2366 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2367 (ins VR128:$src),
2368 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2369 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2370 OpSize {
2371 let isCommutable = Commutable;
2372 }
2373 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2374 (ins i128mem:$src),
2375 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2376 [(set VR128:$dst,
2377 (IntId128
2378 (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002379 }
2380}
2381
Bill Wendling98680292007-08-10 06:22:27 +00002382/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
2383let isTwoAddress = 1 in {
2384 multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
2385 Intrinsic IntId64, Intrinsic IntId128,
2386 bit Commutable = 0> {
2387 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2388 (ins VR64:$src),
2389 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2390 [(set VR64:$dst, (IntId64 VR64:$src))]> {
2391 let isCommutable = Commutable;
2392 }
2393 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2394 (ins i64mem:$src),
2395 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2396 [(set VR64:$dst,
2397 (IntId64
2398 (bitconvert (memopv4i16 addr:$src))))]>;
2399
2400 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2401 (ins VR128:$src),
2402 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2403 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2404 OpSize {
2405 let isCommutable = Commutable;
2406 }
2407 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2408 (ins i128mem:$src),
2409 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2410 [(set VR128:$dst,
2411 (IntId128
2412 (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
2413 }
2414}
2415
2416/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
2417let isTwoAddress = 1 in {
2418 multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
2419 Intrinsic IntId64, Intrinsic IntId128,
2420 bit Commutable = 0> {
2421 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2422 (ins VR64:$src),
2423 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2424 [(set VR64:$dst, (IntId64 VR64:$src))]> {
2425 let isCommutable = Commutable;
2426 }
2427 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2428 (ins i64mem:$src),
2429 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2430 [(set VR64:$dst,
2431 (IntId64
2432 (bitconvert (memopv2i32 addr:$src))))]>;
2433
2434 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2435 (ins VR128:$src),
2436 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2437 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2438 OpSize {
2439 let isCommutable = Commutable;
2440 }
2441 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2442 (ins i128mem:$src),
2443 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2444 [(set VR128:$dst,
2445 (IntId128
2446 (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
2447 }
2448}
2449
2450defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
2451 int_x86_ssse3_pabs_b,
2452 int_x86_ssse3_pabs_b_128>;
2453defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
2454 int_x86_ssse3_pabs_w,
2455 int_x86_ssse3_pabs_w_128>;
2456defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
2457 int_x86_ssse3_pabs_d,
2458 int_x86_ssse3_pabs_d_128>;
2459
2460/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
2461let isTwoAddress = 1 in {
2462 multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
2463 Intrinsic IntId64, Intrinsic IntId128,
2464 bit Commutable = 0> {
2465 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2466 (ins VR64:$src1, VR64:$src2),
2467 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2468 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2469 let isCommutable = Commutable;
2470 }
2471 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2472 (ins VR64:$src1, i64mem:$src2),
2473 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2474 [(set VR64:$dst,
2475 (IntId64 VR64:$src1,
2476 (bitconvert (memopv8i8 addr:$src2))))]>;
2477
2478 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2479 (ins VR128:$src1, VR128:$src2),
2480 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2481 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2482 OpSize {
2483 let isCommutable = Commutable;
2484 }
2485 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2486 (ins VR128:$src1, i128mem:$src2),
2487 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2488 [(set VR128:$dst,
2489 (IntId128 VR128:$src1,
2490 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
2491 }
2492}
2493
2494/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
2495let isTwoAddress = 1 in {
2496 multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
2497 Intrinsic IntId64, Intrinsic IntId128,
2498 bit Commutable = 0> {
2499 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2500 (ins VR64:$src1, VR64:$src2),
2501 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2502 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2503 let isCommutable = Commutable;
2504 }
2505 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2506 (ins VR64:$src1, i64mem:$src2),
2507 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2508 [(set VR64:$dst,
2509 (IntId64 VR64:$src1,
2510 (bitconvert (memopv4i16 addr:$src2))))]>;
2511
2512 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2513 (ins VR128:$src1, VR128:$src2),
2514 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2515 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2516 OpSize {
2517 let isCommutable = Commutable;
2518 }
2519 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2520 (ins VR128:$src1, i128mem:$src2),
2521 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2522 [(set VR128:$dst,
2523 (IntId128 VR128:$src1,
2524 (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
2525 }
2526}
2527
2528/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
2529let isTwoAddress = 1 in {
2530 multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
2531 Intrinsic IntId64, Intrinsic IntId128,
2532 bit Commutable = 0> {
2533 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2534 (ins VR64:$src1, VR64:$src2),
2535 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2536 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2537 let isCommutable = Commutable;
2538 }
2539 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2540 (ins VR64:$src1, i64mem:$src2),
2541 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2542 [(set VR64:$dst,
2543 (IntId64 VR64:$src1,
2544 (bitconvert (memopv2i32 addr:$src2))))]>;
2545
2546 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2547 (ins VR128:$src1, VR128:$src2),
2548 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2549 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2550 OpSize {
2551 let isCommutable = Commutable;
2552 }
2553 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2554 (ins VR128:$src1, i128mem:$src2),
2555 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2556 [(set VR128:$dst,
2557 (IntId128 VR128:$src1,
2558 (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
2559 }
2560}
2561
2562defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
2563 int_x86_ssse3_phadd_w,
2564 int_x86_ssse3_phadd_w_128, 1>;
2565defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
2566 int_x86_ssse3_phadd_d,
2567 int_x86_ssse3_phadd_d_128, 1>;
2568defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
2569 int_x86_ssse3_phadd_sw,
2570 int_x86_ssse3_phadd_sw_128, 1>;
2571defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
2572 int_x86_ssse3_phsub_w,
2573 int_x86_ssse3_phsub_w_128>;
2574defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
2575 int_x86_ssse3_phsub_d,
2576 int_x86_ssse3_phsub_d_128>;
2577defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
2578 int_x86_ssse3_phsub_sw,
2579 int_x86_ssse3_phsub_sw_128>;
2580defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
2581 int_x86_ssse3_pmadd_ub_sw,
2582 int_x86_ssse3_pmadd_ub_sw_128, 1>;
2583defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
2584 int_x86_ssse3_pmul_hr_sw,
2585 int_x86_ssse3_pmul_hr_sw_128, 1>;
2586defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
2587 int_x86_ssse3_pshuf_b,
2588 int_x86_ssse3_pshuf_b_128>;
2589defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
2590 int_x86_ssse3_psign_b,
2591 int_x86_ssse3_psign_b_128>;
2592defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
2593 int_x86_ssse3_psign_w,
2594 int_x86_ssse3_psign_w_128>;
2595defm PSIGND : SS3I_binop_rm_int_32<0x09, "psignd",
2596 int_x86_ssse3_psign_d,
2597 int_x86_ssse3_psign_d_128>;
2598
2599let isTwoAddress = 1 in {
Bill Wendling1dc817c2007-08-10 09:00:17 +00002600 def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2601 (ins VR64:$src1, VR64:$src2, i16imm:$src3),
2602 "palignr\t{$src2, $dst|$dst, $src2}",
2603 [(set VR64:$dst,
2604 (int_x86_ssse3_palign_r
2605 VR64:$src1, VR64:$src2,
2606 imm:$src3))]>;
2607 def PALIGNR64rm : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2608 (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
2609 "palignr\t{$src2, $dst|$dst, $src2}",
2610 [(set VR64:$dst,
2611 (int_x86_ssse3_palign_r
2612 VR64:$src1,
2613 (bitconvert (memopv2i32 addr:$src2)),
2614 imm:$src3))]>;
Bill Wendling98680292007-08-10 06:22:27 +00002615
Bill Wendling1dc817c2007-08-10 09:00:17 +00002616 def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2617 (ins VR128:$src1, VR128:$src2, i32imm:$src3),
2618 "palignr\t{$src2, $dst|$dst, $src2}",
2619 [(set VR128:$dst,
2620 (int_x86_ssse3_palign_r_128
2621 VR128:$src1, VR128:$src2,
2622 imm:$src3))]>, OpSize;
2623 def PALIGNR128rm : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2624 (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
2625 "palignr\t{$src2, $dst|$dst, $src2}",
2626 [(set VR128:$dst,
2627 (int_x86_ssse3_palign_r_128
2628 VR128:$src1,
2629 (bitconvert (memopv4i32 addr:$src2)),
2630 imm:$src3))]>, OpSize;
Bill Wendling98680292007-08-10 06:22:27 +00002631}
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002632
2633//===----------------------------------------------------------------------===//
2634// Non-Instruction Patterns
2635//===----------------------------------------------------------------------===//
2636
2637// 128-bit vector undef's.
Bill Wendling1dc817c2007-08-10 09:00:17 +00002638def : Pat<(v4f32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002639def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2640def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2641def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2642def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2643def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2644
2645// 128-bit vector all zero's.
2646def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2647def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2648def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2649def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2650def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2651
2652// 128-bit vector all one's.
2653def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2654def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2655def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2656def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2657def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
2658
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002659
2660// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
2661// 16-bits matter.
2662def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2663 Requires<[HasSSE2]>;
2664def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2665 Requires<[HasSSE2]>;
2666
2667// bit_convert
2668let Predicates = [HasSSE2] in {
2669 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
2670 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
2671 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
2672 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
2673 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
2674 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
2675 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
2676 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
2677 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
2678 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
2679 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
2680 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
2681 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
2682 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
2683 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
2684 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
2685 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
2686 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
2687 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
2688 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
2689 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
2690 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
2691 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
2692 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
2693 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
2694 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
2695 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
2696 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
2697 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
2698 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
2699}
2700
2701// Move scalar to XMM zero-extended
2702// movd to XMM register zero-extends
2703let AddedComplexity = 15 in {
2704def : Pat<(v8i16 (vector_shuffle immAllZerosV,
2705 (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2706 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2707def : Pat<(v16i8 (vector_shuffle immAllZerosV,
2708 (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2709 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2710// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
2711def : Pat<(v2f64 (vector_shuffle immAllZerosV,
2712 (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
2713 (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
2714def : Pat<(v4f32 (vector_shuffle immAllZerosV,
2715 (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
2716 (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
2717}
2718
2719// Splat v2f64 / v2i64
2720let AddedComplexity = 10 in {
2721def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2722 (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2723def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2724 (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2725def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2726 (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2727def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2728 (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2729}
2730
2731// Splat v4f32
2732def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
2733 (SHUFPSrri VR128:$src, VR128:$src, SSE_splat_mask:$sm)>,
2734 Requires<[HasSSE1]>;
2735
2736// Special unary SHUFPSrri case.
2737// FIXME: when we want non two-address code, then we should use PSHUFD?
2738def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
2739 SHUFP_unary_shuffle_mask:$sm),
2740 (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2741 Requires<[HasSSE1]>;
Dan Gohman7dc19012007-08-02 21:17:01 +00002742// Special unary SHUFPDrri case.
2743def : Pat<(vector_shuffle (v2f64 VR128:$src1), (undef),
2744 SHUFP_unary_shuffle_mask:$sm),
2745 (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2746 Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002747// Unary v4f32 shuffle with PSHUF* in order to fold a load.
Dan Gohman4a4f1512007-07-18 20:23:34 +00002748def : Pat<(vector_shuffle (memopv4f32 addr:$src1), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002749 SHUFP_unary_shuffle_mask:$sm),
2750 (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2751 Requires<[HasSSE2]>;
2752// Special binary v4i32 shuffle cases with SHUFPS.
2753def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
2754 PSHUFD_binary_shuffle_mask:$sm),
2755 (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2756 Requires<[HasSSE2]>;
2757def : Pat<(vector_shuffle (v4i32 VR128:$src1),
Dan Gohman4a4f1512007-07-18 20:23:34 +00002758 (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002759 (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2760 Requires<[HasSSE2]>;
2761
2762// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
2763let AddedComplexity = 10 in {
2764def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2765 UNPCKL_v_undef_shuffle_mask)),
2766 (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2767def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2768 UNPCKL_v_undef_shuffle_mask)),
2769 (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2770def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2771 UNPCKL_v_undef_shuffle_mask)),
2772 (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2773def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2774 UNPCKL_v_undef_shuffle_mask)),
2775 (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2776}
2777
2778// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
2779let AddedComplexity = 10 in {
2780def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2781 UNPCKH_v_undef_shuffle_mask)),
2782 (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2783def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2784 UNPCKH_v_undef_shuffle_mask)),
2785 (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2786def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2787 UNPCKH_v_undef_shuffle_mask)),
2788 (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2789def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2790 UNPCKH_v_undef_shuffle_mask)),
2791 (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2792}
2793
2794let AddedComplexity = 15 in {
2795// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
2796def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2797 MOVHP_shuffle_mask)),
2798 (MOVLHPSrr VR128:$src1, VR128:$src2)>;
2799
2800// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
2801def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2802 MOVHLPS_shuffle_mask)),
2803 (MOVHLPSrr VR128:$src1, VR128:$src2)>;
2804
2805// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
2806def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
2807 MOVHLPS_v_undef_shuffle_mask)),
2808 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2809def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
2810 MOVHLPS_v_undef_shuffle_mask)),
2811 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2812}
2813
2814let AddedComplexity = 20 in {
2815// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
2816// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
Dan Gohman4a4f1512007-07-18 20:23:34 +00002817def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002818 MOVLP_shuffle_mask)),
2819 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002820def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002821 MOVLP_shuffle_mask)),
2822 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002823def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002824 MOVHP_shuffle_mask)),
2825 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002826def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002827 MOVHP_shuffle_mask)),
2828 (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2829
Dan Gohman4a4f1512007-07-18 20:23:34 +00002830def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002831 MOVLP_shuffle_mask)),
2832 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002833def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002834 MOVLP_shuffle_mask)),
2835 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002836def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002837 MOVHP_shuffle_mask)),
2838 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002839def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002840 MOVLP_shuffle_mask)),
2841 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2842}
2843
2844let AddedComplexity = 15 in {
2845// Setting the lowest element in the vector.
2846def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2847 MOVL_shuffle_mask)),
2848 (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2849def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2850 MOVL_shuffle_mask)),
2851 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2852
2853// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
2854def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
2855 MOVLP_shuffle_mask)),
2856 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2857def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2858 MOVLP_shuffle_mask)),
2859 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2860}
2861
2862// Set lowest element and zero upper elements.
2863let AddedComplexity = 20 in
2864def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV,
2865 (v2f64 (scalar_to_vector (loadf64 addr:$src))),
2866 MOVL_shuffle_mask)),
2867 (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
2868
2869// FIXME: Temporary workaround since 2-wide shuffle is broken.
2870def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
2871 (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2872def : Pat<(int_x86_sse2_loadh_pd VR128:$src1, addr:$src2),
2873 (v2f64 (MOVHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2874def : Pat<(int_x86_sse2_loadl_pd VR128:$src1, addr:$src2),
2875 (v2f64 (MOVLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2876def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
2877 (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
2878 Requires<[HasSSE2]>;
2879def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (load addr:$src2), imm:$src3),
2880 (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
2881 Requires<[HasSSE2]>;
2882def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
2883 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2884def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (load addr:$src2)),
2885 (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2886def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
2887 (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2888def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (load addr:$src2)),
2889 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2890def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
2891 (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2892def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (load addr:$src2)),
2893 (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2894def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
2895 (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2896def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (load addr:$src2)),
2897 (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2898
2899// Some special case pandn patterns.
2900def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
2901 VR128:$src2)),
2902 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2903def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
2904 VR128:$src2)),
2905 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2906def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
2907 VR128:$src2)),
2908 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2909
2910def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002911 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002912 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2913def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002914 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002915 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2916def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002917 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002918 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2919
Evan Cheng51a49b22007-07-20 00:27:43 +00002920// Use movaps / movups for SSE integer load / store (one byte shorter).
Dan Gohman11821702007-07-27 17:16:43 +00002921def : Pat<(alignedloadv4i32 addr:$src),
2922 (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
2923def : Pat<(loadv4i32 addr:$src),
2924 (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
Evan Cheng51a49b22007-07-20 00:27:43 +00002925def : Pat<(alignedloadv2i64 addr:$src),
2926 (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
2927def : Pat<(loadv2i64 addr:$src),
2928 (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
2929
2930def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
2931 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2932def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
2933 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2934def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
2935 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2936def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
2937 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2938def : Pat<(store (v2i64 VR128:$src), addr:$dst),
2939 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2940def : Pat<(store (v4i32 VR128:$src), addr:$dst),
2941 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2942def : Pat<(store (v8i16 VR128:$src), addr:$dst),
2943 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2944def : Pat<(store (v16i8 VR128:$src), addr:$dst),
2945 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
Evan Cheng86ab7d32007-07-31 08:04:03 +00002946
2947// (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr)
2948def : Pat<(vector_extract
2949 (bc_v4i32 (v4f32 (scalar_to_vector (loadf32 addr:$src)))), (iPTR 0)),
Evan Cheng43a09ac2007-08-01 21:42:24 +00002950 (MOV32rm addr:$src)>, Requires<[HasSSE2]>;
Evan Cheng86ab7d32007-07-31 08:04:03 +00002951def : Pat<(vector_extract
2952 (bc_v2i64 (v2f64 (scalar_to_vector (loadf64 addr:$src)))), (iPTR 0)),
Evan Cheng43a09ac2007-08-01 21:42:24 +00002953 (MOV64rm addr:$src)>, Requires<[HasSSE2, In64BitMode]>;