blob: d6ba5ded98e5c42a4dcb889891d67b680aa5fd12 [file] [log] [blame]
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001//====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the University
6// of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 SSE instruction set, defining the instructions,
11// and properties of the instructions which are needed for code generation,
12// machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16
17//===----------------------------------------------------------------------===//
18// SSE specific DAG Nodes.
19//===----------------------------------------------------------------------===//
20
21def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
22 SDTCisFP<0>, SDTCisInt<2> ]>;
23
Dan Gohmanf17a25c2007-07-18 16:29:46 +000024def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
25def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
26def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
27 [SDNPCommutative, SDNPAssociative]>;
28def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
29 [SDNPCommutative, SDNPAssociative]>;
30def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
31 [SDNPCommutative, SDNPAssociative]>;
32def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
33def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
34def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
35def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
36 [SDNPHasChain, SDNPOutFlag]>;
37def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest,
38 [SDNPHasChain, SDNPOutFlag]>;
39def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
40def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
41def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
42
43//===----------------------------------------------------------------------===//
44// SSE 'Special' Instructions
45//===----------------------------------------------------------------------===//
46
Evan Chengb783fa32007-07-19 01:14:50 +000047def IMPLICIT_DEF_VR128 : I<0, Pseudo, (outs VR128:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000048 "#IMPLICIT_DEF $dst",
49 [(set VR128:$dst, (v4f32 (undef)))]>,
50 Requires<[HasSSE1]>;
Evan Chengb783fa32007-07-19 01:14:50 +000051def IMPLICIT_DEF_FR32 : I<0, Pseudo, (outs FR32:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000052 "#IMPLICIT_DEF $dst",
53 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +000054def IMPLICIT_DEF_FR64 : I<0, Pseudo, (outs FR64:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000055 "#IMPLICIT_DEF $dst",
56 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
57
58//===----------------------------------------------------------------------===//
59// SSE Complex Patterns
60//===----------------------------------------------------------------------===//
61
62// These are 'extloads' from a scalar to the low element of a vector, zeroing
63// the top elements. These are used for the SSE 'ss' and 'sd' instruction
64// forms.
65def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", [],
66 [SDNPHasChain]>;
67def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", [],
68 [SDNPHasChain]>;
69
70def ssmem : Operand<v4f32> {
71 let PrintMethod = "printf32mem";
72 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
73}
74def sdmem : Operand<v2f64> {
75 let PrintMethod = "printf64mem";
76 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
77}
78
79//===----------------------------------------------------------------------===//
80// SSE pattern fragments
81//===----------------------------------------------------------------------===//
82
Dan Gohmanf17a25c2007-07-18 16:29:46 +000083def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
84def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
85def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
86def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
87
Dan Gohman11821702007-07-27 17:16:43 +000088// Like 'store', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000089def alignedstore : PatFrag<(ops node:$val, node:$ptr),
90 (st node:$val, node:$ptr), [{
91 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
92 return !ST->isTruncatingStore() &&
93 ST->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +000094 ST->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +000095 return false;
96}]>;
97
Dan Gohman11821702007-07-27 17:16:43 +000098// Like 'load', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000099def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
100 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
101 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
102 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000103 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000104 return false;
105}]>;
106
Dan Gohman11821702007-07-27 17:16:43 +0000107def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
108def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000109def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
110def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
111def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
112def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>;
113
114// Like 'load', but uses special alignment checks suitable for use in
115// memory operands in most SSE instructions, which are required to
116// be naturally aligned on some targets but not on others.
117// FIXME: Actually implement support for targets that don't require the
118// alignment. This probably wants a subtarget predicate.
119def memop : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
120 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
121 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
122 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000123 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000124 return false;
125}]>;
126
Dan Gohman11821702007-07-27 17:16:43 +0000127def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
128def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000129def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
130def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
131def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
132def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
133
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000134def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
135def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
136def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
137def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
138def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
139def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
140
141def fp32imm0 : PatLeaf<(f32 fpimm), [{
142 return N->isExactlyValue(+0.0);
143}]>;
144
145def PSxLDQ_imm : SDNodeXForm<imm, [{
146 // Transformation function: imm >> 3
147 return getI32Imm(N->getValue() >> 3);
148}]>;
149
150// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
151// SHUFP* etc. imm.
152def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
153 return getI8Imm(X86::getShuffleSHUFImmediate(N));
154}]>;
155
156// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
157// PSHUFHW imm.
158def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
159 return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
160}]>;
161
162// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
163// PSHUFLW imm.
164def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
165 return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
166}]>;
167
168def SSE_splat_mask : PatLeaf<(build_vector), [{
169 return X86::isSplatMask(N);
170}], SHUFFLE_get_shuf_imm>;
171
172def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
173 return X86::isSplatLoMask(N);
174}]>;
175
176def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
177 return X86::isMOVHLPSMask(N);
178}]>;
179
180def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
181 return X86::isMOVHLPS_v_undef_Mask(N);
182}]>;
183
184def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
185 return X86::isMOVHPMask(N);
186}]>;
187
188def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
189 return X86::isMOVLPMask(N);
190}]>;
191
192def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
193 return X86::isMOVLMask(N);
194}]>;
195
196def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
197 return X86::isMOVSHDUPMask(N);
198}]>;
199
200def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
201 return X86::isMOVSLDUPMask(N);
202}]>;
203
204def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
205 return X86::isUNPCKLMask(N);
206}]>;
207
208def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
209 return X86::isUNPCKHMask(N);
210}]>;
211
212def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
213 return X86::isUNPCKL_v_undef_Mask(N);
214}]>;
215
216def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
217 return X86::isUNPCKH_v_undef_Mask(N);
218}]>;
219
220def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
221 return X86::isPSHUFDMask(N);
222}], SHUFFLE_get_shuf_imm>;
223
224def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
225 return X86::isPSHUFHWMask(N);
226}], SHUFFLE_get_pshufhw_imm>;
227
228def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
229 return X86::isPSHUFLWMask(N);
230}], SHUFFLE_get_pshuflw_imm>;
231
232def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
233 return X86::isPSHUFDMask(N);
234}], SHUFFLE_get_shuf_imm>;
235
236def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
237 return X86::isSHUFPMask(N);
238}], SHUFFLE_get_shuf_imm>;
239
240def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
241 return X86::isSHUFPMask(N);
242}], SHUFFLE_get_shuf_imm>;
243
244//===----------------------------------------------------------------------===//
245// SSE scalar FP Instructions
246//===----------------------------------------------------------------------===//
247
248// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
249// scheduler into a branch sequence.
250let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
251 def CMOV_FR32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000252 (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000253 "#CMOV_FR32 PSEUDO!",
254 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
255 def CMOV_FR64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000256 (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000257 "#CMOV_FR64 PSEUDO!",
258 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
259 def CMOV_V4F32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000260 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000261 "#CMOV_V4F32 PSEUDO!",
262 [(set VR128:$dst,
263 (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
264 def CMOV_V2F64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000265 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000266 "#CMOV_V2F64 PSEUDO!",
267 [(set VR128:$dst,
268 (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
269 def CMOV_V2I64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000270 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000271 "#CMOV_V2I64 PSEUDO!",
272 [(set VR128:$dst,
273 (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
274}
275
276//===----------------------------------------------------------------------===//
277// SSE1 Instructions
278//===----------------------------------------------------------------------===//
279
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000280// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000281def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000282 "movss {$src, $dst|$dst, $src}", []>;
Evan Chengb783fa32007-07-19 01:14:50 +0000283def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000284 "movss {$src, $dst|$dst, $src}",
285 [(set FR32:$dst, (loadf32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000286def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000287 "movss {$src, $dst|$dst, $src}",
288 [(store FR32:$src, addr:$dst)]>;
289
290// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000291def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000292 "cvttss2si {$src, $dst|$dst, $src}",
293 [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000294def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000295 "cvttss2si {$src, $dst|$dst, $src}",
296 [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000297def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000298 "cvtsi2ss {$src, $dst|$dst, $src}",
299 [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000300def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000301 "cvtsi2ss {$src, $dst|$dst, $src}",
302 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
303
304// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +0000305def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000306 "cvtss2si {$src, $dst|$dst, $src}",
307 [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000308def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000309 "cvtss2si {$src, $dst|$dst, $src}",
310 [(set GR32:$dst, (int_x86_sse_cvtss2si
311 (load addr:$src)))]>;
312
313// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +0000314def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000315 "cvttss2si {$src, $dst|$dst, $src}",
316 [(set GR32:$dst,
317 (int_x86_sse_cvttss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000318def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000319 "cvttss2si {$src, $dst|$dst, $src}",
320 [(set GR32:$dst,
321 (int_x86_sse_cvttss2si(load addr:$src)))]>;
322
323let isTwoAddress = 1 in {
324 def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000325 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000326 "cvtsi2ss {$src2, $dst|$dst, $src2}",
327 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
328 GR32:$src2))]>;
329 def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000330 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000331 "cvtsi2ss {$src2, $dst|$dst, $src2}",
332 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
333 (loadi32 addr:$src2)))]>;
334}
335
336// Comparison instructions
337let isTwoAddress = 1 in {
338 def CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000339 (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
Dan Gohmane7b5deb2007-07-26 15:11:50 +0000340 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000341 def CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000342 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000343 "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
344}
345
Evan Chengb783fa32007-07-19 01:14:50 +0000346def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000347 "ucomiss {$src2, $src1|$src1, $src2}",
348 [(X86cmp FR32:$src1, FR32:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000349def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000350 "ucomiss {$src2, $src1|$src1, $src2}",
351 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
352
353// Aliases to match intrinsics which expect XMM operand(s).
354let isTwoAddress = 1 in {
355 def Int_CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000356 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000357 "cmp${cc}ss {$src, $dst|$dst, $src}",
358 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
359 VR128:$src, imm:$cc))]>;
360 def Int_CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000361 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000362 "cmp${cc}ss {$src, $dst|$dst, $src}",
363 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
364 (load addr:$src), imm:$cc))]>;
365}
366
Evan Chengb783fa32007-07-19 01:14:50 +0000367def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000368 "ucomiss {$src2, $src1|$src1, $src2}",
369 [(X86ucomi (v4f32 VR128:$src1), VR128:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000370def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000371 "ucomiss {$src2, $src1|$src1, $src2}",
372 [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2))]>;
373
Evan Chengb783fa32007-07-19 01:14:50 +0000374def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000375 "comiss {$src2, $src1|$src1, $src2}",
376 [(X86comi (v4f32 VR128:$src1), VR128:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000377def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000378 "comiss {$src2, $src1|$src1, $src2}",
379 [(X86comi (v4f32 VR128:$src1), (load addr:$src2))]>;
380
381// Aliases of packed SSE1 instructions for scalar use. These all have names that
382// start with 'Fs'.
383
384// Alias instructions that map fld0 to pxor for sse.
Evan Chengb783fa32007-07-19 01:14:50 +0000385def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000386 "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
387 Requires<[HasSSE1]>, TB, OpSize;
388
389// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
390// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +0000391def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000392 "movaps {$src, $dst|$dst, $src}", []>;
393
394// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
395// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +0000396def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000397 "movaps {$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +0000398 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000399
400// Alias bitwise logical operations using SSE logical ops on packed FP values.
401let isTwoAddress = 1 in {
402let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000403 def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000404 "andps {$src2, $dst|$dst, $src2}",
405 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000406 def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000407 "orps {$src2, $dst|$dst, $src2}",
408 [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000409 def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000410 "xorps {$src2, $dst|$dst, $src2}",
411 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
412}
413
Evan Chengb783fa32007-07-19 01:14:50 +0000414def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000415 "andps {$src2, $dst|$dst, $src2}",
416 [(set FR32:$dst, (X86fand FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000417 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000418def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000419 "orps {$src2, $dst|$dst, $src2}",
420 [(set FR32:$dst, (X86for FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000421 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000422def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000423 "xorps {$src2, $dst|$dst, $src2}",
424 [(set FR32:$dst, (X86fxor FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000425 (memopfsf32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000426
427def FsANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000428 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000429 "andnps {$src2, $dst|$dst, $src2}", []>;
430def FsANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000431 (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000432 "andnps {$src2, $dst|$dst, $src2}", []>;
433}
434
435/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
436///
437/// In addition, we also have a special variant of the scalar form here to
438/// represent the associated intrinsic operation. This form is unlike the
439/// plain scalar form, in that it takes an entire vector (instead of a scalar)
440/// and leaves the top elements undefined.
441///
442/// These three forms can each be reg+reg or reg+mem, so there are a total of
443/// six "instructions".
444///
445let isTwoAddress = 1 in {
446multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
447 SDNode OpNode, Intrinsic F32Int,
448 bit Commutable = 0> {
449 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000450 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000451 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
452 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
453 let isCommutable = Commutable;
454 }
455
456 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000457 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000458 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
459 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
460
461 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000462 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000463 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
464 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
465 let isCommutable = Commutable;
466 }
467
468 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000469 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000470 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000471 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000472
473 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000474 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000475 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
476 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
477 let isCommutable = Commutable;
478 }
479
480 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000481 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000482 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
483 [(set VR128:$dst, (F32Int VR128:$src1,
484 sse_load_f32:$src2))]>;
485}
486}
487
488// Arithmetic instructions
489defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
490defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
491defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
492defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
493
494/// sse1_fp_binop_rm - Other SSE1 binops
495///
496/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
497/// instructions for a full-vector intrinsic form. Operations that map
498/// onto C operators don't use this form since they just use the plain
499/// vector form instead of having a separate vector intrinsic form.
500///
501/// This provides a total of eight "instructions".
502///
503let isTwoAddress = 1 in {
504multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
505 SDNode OpNode,
506 Intrinsic F32Int,
507 Intrinsic V4F32Int,
508 bit Commutable = 0> {
509
510 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000511 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000512 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
513 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
514 let isCommutable = Commutable;
515 }
516
517 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000518 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000519 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
520 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
521
522 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000523 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000524 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
525 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
526 let isCommutable = Commutable;
527 }
528
529 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000530 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000531 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000532 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000533
534 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000535 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000536 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
537 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
538 let isCommutable = Commutable;
539 }
540
541 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000542 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000543 !strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2}"),
544 [(set VR128:$dst, (F32Int VR128:$src1,
545 sse_load_f32:$src2))]>;
546
547 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000548 def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000549 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
550 [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
551 let isCommutable = Commutable;
552 }
553
554 // Vector intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000555 def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000556 !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2}"),
557 [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
558}
559}
560
561defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
562 int_x86_sse_max_ss, int_x86_sse_max_ps>;
563defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
564 int_x86_sse_min_ss, int_x86_sse_min_ps>;
565
566//===----------------------------------------------------------------------===//
567// SSE packed FP Instructions
568
569// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000570def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000571 "movaps {$src, $dst|$dst, $src}", []>;
Evan Chengb783fa32007-07-19 01:14:50 +0000572def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000573 "movaps {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000574 [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000575
Evan Chengb783fa32007-07-19 01:14:50 +0000576def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000577 "movaps {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000578 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000579
Evan Chengb783fa32007-07-19 01:14:50 +0000580def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000581 "movups {$src, $dst|$dst, $src}", []>;
Evan Chengb783fa32007-07-19 01:14:50 +0000582def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000583 "movups {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000584 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000585def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000586 "movups {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000587 [(store (v4f32 VR128:$src), addr:$dst)]>;
588
589// Intrinsic forms of MOVUPS load and store
Evan Chengb783fa32007-07-19 01:14:50 +0000590def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000591 "movups {$src, $dst|$dst, $src}",
592 [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000593def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000594 "movups {$src, $dst|$dst, $src}",
595 [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000596
597let isTwoAddress = 1 in {
598 let AddedComplexity = 20 in {
599 def MOVLPSrm : PSI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000600 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000601 "movlps {$src2, $dst|$dst, $src2}",
602 [(set VR128:$dst,
603 (v4f32 (vector_shuffle VR128:$src1,
604 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
605 MOVLP_shuffle_mask)))]>;
606 def MOVHPSrm : PSI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000607 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000608 "movhps {$src2, $dst|$dst, $src2}",
609 [(set VR128:$dst,
610 (v4f32 (vector_shuffle VR128:$src1,
611 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
612 MOVHP_shuffle_mask)))]>;
613 } // AddedComplexity
614} // isTwoAddress
615
Evan Chengb783fa32007-07-19 01:14:50 +0000616def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000617 "movlps {$src, $dst|$dst, $src}",
618 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
619 (iPTR 0))), addr:$dst)]>;
620
621// v2f64 extract element 1 is always custom lowered to unpack high to low
622// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +0000623def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000624 "movhps {$src, $dst|$dst, $src}",
625 [(store (f64 (vector_extract
626 (v2f64 (vector_shuffle
627 (bc_v2f64 (v4f32 VR128:$src)), (undef),
628 UNPCKH_shuffle_mask)), (iPTR 0))),
629 addr:$dst)]>;
630
631let isTwoAddress = 1 in {
632let AddedComplexity = 15 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000633def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000634 "movlhps {$src2, $dst|$dst, $src2}",
635 [(set VR128:$dst,
636 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
637 MOVHP_shuffle_mask)))]>;
638
Evan Chengb783fa32007-07-19 01:14:50 +0000639def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000640 "movhlps {$src2, $dst|$dst, $src2}",
641 [(set VR128:$dst,
642 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
643 MOVHLPS_shuffle_mask)))]>;
644} // AddedComplexity
645} // isTwoAddress
646
647
648
649// Arithmetic
650
651/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
652///
653/// In addition, we also have a special variant of the scalar form here to
654/// represent the associated intrinsic operation. This form is unlike the
655/// plain scalar form, in that it takes an entire vector (instead of a
656/// scalar) and leaves the top elements undefined.
657///
658/// And, we have a special variant form for a full-vector intrinsic form.
659///
660/// These four forms can each have a reg or a mem operand, so there are a
661/// total of eight "instructions".
662///
663multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
664 SDNode OpNode,
665 Intrinsic F32Int,
666 Intrinsic V4F32Int,
667 bit Commutable = 0> {
668 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000669 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000670 !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
671 [(set FR32:$dst, (OpNode FR32:$src))]> {
672 let isCommutable = Commutable;
673 }
674
675 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000676 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000677 !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
678 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
679
680 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000681 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000682 !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
683 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
684 let isCommutable = Commutable;
685 }
686
687 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000688 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000689 !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000690 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000691
692 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000693 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000694 !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
695 [(set VR128:$dst, (F32Int VR128:$src))]> {
696 let isCommutable = Commutable;
697 }
698
699 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000700 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000701 !strconcat(OpcodeStr, "ss {$src, $dst|$dst, $src}"),
702 [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
703
704 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +0000705 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000706 !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
707 [(set VR128:$dst, (V4F32Int VR128:$src))]> {
708 let isCommutable = Commutable;
709 }
710
711 // Vector intrinsic operation, mem
Evan Chengb783fa32007-07-19 01:14:50 +0000712 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000713 !strconcat(OpcodeStr, "ps {$src, $dst|$dst, $src}"),
714 [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
715}
716
717// Square root.
718defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
719 int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
720
721// Reciprocal approximations. Note that these typically require refinement
722// in order to obtain suitable precision.
723defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
724 int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
725defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
726 int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
727
728// Logical
729let isTwoAddress = 1 in {
730 let isCommutable = 1 in {
731 def ANDPSrr : PSI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000732 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000733 "andps {$src2, $dst|$dst, $src2}",
734 [(set VR128:$dst, (v2i64
735 (and VR128:$src1, VR128:$src2)))]>;
736 def ORPSrr : PSI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000737 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000738 "orps {$src2, $dst|$dst, $src2}",
739 [(set VR128:$dst, (v2i64
740 (or VR128:$src1, VR128:$src2)))]>;
741 def XORPSrr : PSI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000742 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000743 "xorps {$src2, $dst|$dst, $src2}",
744 [(set VR128:$dst, (v2i64
745 (xor VR128:$src1, VR128:$src2)))]>;
746 }
747
748 def ANDPSrm : PSI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000749 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000750 "andps {$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000751 [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
752 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000753 def ORPSrm : PSI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000754 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000755 "orps {$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000756 [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
757 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000758 def XORPSrm : PSI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000759 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000760 "xorps {$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000761 [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
762 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000763 def ANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000764 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000765 "andnps {$src2, $dst|$dst, $src2}",
766 [(set VR128:$dst,
767 (v2i64 (and (xor VR128:$src1,
768 (bc_v2i64 (v4i32 immAllOnesV))),
769 VR128:$src2)))]>;
770 def ANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000771 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000772 "andnps {$src2, $dst|$dst, $src2}",
773 [(set VR128:$dst,
Evan Cheng8e92cd12007-07-19 23:34:10 +0000774 (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000775 (bc_v2i64 (v4i32 immAllOnesV))),
Evan Cheng8e92cd12007-07-19 23:34:10 +0000776 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000777}
778
779let isTwoAddress = 1 in {
780 def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000781 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000782 "cmp${cc}ps {$src, $dst|$dst, $src}",
783 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
784 VR128:$src, imm:$cc))]>;
785 def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000786 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000787 "cmp${cc}ps {$src, $dst|$dst, $src}",
788 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
789 (load addr:$src), imm:$cc))]>;
790}
791
792// Shuffle and unpack instructions
793let isTwoAddress = 1 in {
794 let isConvertibleToThreeAddress = 1 in // Convert to pshufd
795 def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000796 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000797 VR128:$src2, i32i8imm:$src3),
798 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
799 [(set VR128:$dst,
800 (v4f32 (vector_shuffle
801 VR128:$src1, VR128:$src2,
802 SHUFP_shuffle_mask:$src3)))]>;
803 def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000804 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000805 f128mem:$src2, i32i8imm:$src3),
806 "shufps {$src3, $src2, $dst|$dst, $src2, $src3}",
807 [(set VR128:$dst,
808 (v4f32 (vector_shuffle
809 VR128:$src1, (load addr:$src2),
810 SHUFP_shuffle_mask:$src3)))]>;
811
812 let AddedComplexity = 10 in {
813 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000814 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000815 "unpckhps {$src2, $dst|$dst, $src2}",
816 [(set VR128:$dst,
817 (v4f32 (vector_shuffle
818 VR128:$src1, VR128:$src2,
819 UNPCKH_shuffle_mask)))]>;
820 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000821 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000822 "unpckhps {$src2, $dst|$dst, $src2}",
823 [(set VR128:$dst,
824 (v4f32 (vector_shuffle
825 VR128:$src1, (load addr:$src2),
826 UNPCKH_shuffle_mask)))]>;
827
828 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000829 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000830 "unpcklps {$src2, $dst|$dst, $src2}",
831 [(set VR128:$dst,
832 (v4f32 (vector_shuffle
833 VR128:$src1, VR128:$src2,
834 UNPCKL_shuffle_mask)))]>;
835 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000836 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000837 "unpcklps {$src2, $dst|$dst, $src2}",
838 [(set VR128:$dst,
839 (v4f32 (vector_shuffle
840 VR128:$src1, (load addr:$src2),
841 UNPCKL_shuffle_mask)))]>;
842 } // AddedComplexity
843} // isTwoAddress
844
845// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +0000846def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000847 "movmskps {$src, $dst|$dst, $src}",
848 [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000849def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000850 "movmskpd {$src, $dst|$dst, $src}",
851 [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
852
853// Prefetching loads.
854// TODO: no intrinsics for these?
Evan Chengb783fa32007-07-19 01:14:50 +0000855def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0 $src", []>;
856def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1 $src", []>;
857def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2 $src", []>;
858def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta $src", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000859
860// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +0000861def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000862 "movntps {$src, $dst|$dst, $src}",
863 [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
864
865// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +0000866def SFENCE : PSI<0xAE, MRM7m, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000867
868// MXCSR register
Evan Chengb783fa32007-07-19 01:14:50 +0000869def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000870 "ldmxcsr $src", [(int_x86_sse_ldmxcsr addr:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000871def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000872 "stmxcsr $dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
873
874// Alias instructions that map zero vector to pxor / xorp* for sse.
875// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
876let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000877def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000878 "xorps $dst, $dst",
879 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
880
881// FR32 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +0000882def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000883 "movss {$src, $dst|$dst, $src}",
884 [(set VR128:$dst,
885 (v4f32 (scalar_to_vector FR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000886def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000887 "movss {$src, $dst|$dst, $src}",
888 [(set VR128:$dst,
889 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
890
891// FIXME: may not be able to eliminate this movss with coalescing the src and
892// dest register classes are different. We really want to write this pattern
893// like this:
894// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
895// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +0000896def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000897 "movss {$src, $dst|$dst, $src}",
898 [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
899 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000900def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000901 "movss {$src, $dst|$dst, $src}",
902 [(store (f32 (vector_extract (v4f32 VR128:$src),
903 (iPTR 0))), addr:$dst)]>;
904
905
906// Move to lower bits of a VR128, leaving upper bits alone.
907// Three operand (but two address) aliases.
908let isTwoAddress = 1 in {
909 def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000910 (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000911 "movss {$src2, $dst|$dst, $src2}", []>;
912
913 let AddedComplexity = 15 in
914 def MOVLPSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000915 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000916 "movss {$src2, $dst|$dst, $src2}",
917 [(set VR128:$dst,
918 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
919 MOVL_shuffle_mask)))]>;
920}
921
922// Move to lower bits of a VR128 and zeroing upper bits.
923// Loading from memory automatically zeroing upper bits.
924let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +0000925def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000926 "movss {$src, $dst|$dst, $src}",
927 [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
928 (v4f32 (scalar_to_vector (loadf32 addr:$src))),
929 MOVL_shuffle_mask)))]>;
930
931
932//===----------------------------------------------------------------------===//
933// SSE2 Instructions
934//===----------------------------------------------------------------------===//
935
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000936// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000937def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000938 "movsd {$src, $dst|$dst, $src}", []>;
Evan Chengb783fa32007-07-19 01:14:50 +0000939def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000940 "movsd {$src, $dst|$dst, $src}",
941 [(set FR64:$dst, (loadf64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000942def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000943 "movsd {$src, $dst|$dst, $src}",
944 [(store FR64:$src, addr:$dst)]>;
945
946// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000947def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000948 "cvttsd2si {$src, $dst|$dst, $src}",
949 [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000950def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000951 "cvttsd2si {$src, $dst|$dst, $src}",
952 [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000953def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000954 "cvtsd2ss {$src, $dst|$dst, $src}",
955 [(set FR32:$dst, (fround FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000956def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000957 "cvtsd2ss {$src, $dst|$dst, $src}",
958 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000959def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000960 "cvtsi2sd {$src, $dst|$dst, $src}",
961 [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000962def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000963 "cvtsi2sd {$src, $dst|$dst, $src}",
964 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
965
966// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +0000967def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000968 "cvtss2sd {$src, $dst|$dst, $src}",
969 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
970 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000971def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000972 "cvtss2sd {$src, $dst|$dst, $src}",
973 [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
974 Requires<[HasSSE2]>;
975
976// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +0000977def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000978 "cvtsd2si {$src, $dst|$dst, $src}",
979 [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000980def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000981 "cvtsd2si {$src, $dst|$dst, $src}",
982 [(set GR32:$dst, (int_x86_sse2_cvtsd2si
983 (load addr:$src)))]>;
984
985// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +0000986def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000987 "cvttsd2si {$src, $dst|$dst, $src}",
988 [(set GR32:$dst,
989 (int_x86_sse2_cvttsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000990def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000991 "cvttsd2si {$src, $dst|$dst, $src}",
992 [(set GR32:$dst, (int_x86_sse2_cvttsd2si
993 (load addr:$src)))]>;
994
995// Comparison instructions
996let isTwoAddress = 1 in {
997 def CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000998 (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000999 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
1000 def CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001001 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001002 "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
1003}
1004
Evan Chengb783fa32007-07-19 01:14:50 +00001005def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001006 "ucomisd {$src2, $src1|$src1, $src2}",
1007 [(X86cmp FR64:$src1, FR64:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001008def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001009 "ucomisd {$src2, $src1|$src1, $src2}",
1010 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
1011
1012// Aliases to match intrinsics which expect XMM operand(s).
1013let isTwoAddress = 1 in {
1014 def Int_CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001015 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001016 "cmp${cc}sd {$src, $dst|$dst, $src}",
1017 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1018 VR128:$src, imm:$cc))]>;
1019 def Int_CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001020 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001021 "cmp${cc}sd {$src, $dst|$dst, $src}",
1022 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1023 (load addr:$src), imm:$cc))]>;
1024}
1025
Evan Chengb783fa32007-07-19 01:14:50 +00001026def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001027 "ucomisd {$src2, $src1|$src1, $src2}",
1028 [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001029def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001030 "ucomisd {$src2, $src1|$src1, $src2}",
1031 [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2))]>;
1032
Evan Chengb783fa32007-07-19 01:14:50 +00001033def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001034 "comisd {$src2, $src1|$src1, $src2}",
1035 [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001036def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001037 "comisd {$src2, $src1|$src1, $src2}",
1038 [(X86comi (v2f64 VR128:$src1), (load addr:$src2))]>;
1039
1040// Aliases of packed SSE2 instructions for scalar use. These all have names that
1041// start with 'Fs'.
1042
1043// Alias instructions that map fld0 to pxor for sse.
Evan Chengb783fa32007-07-19 01:14:50 +00001044def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001045 "pxor $dst, $dst", [(set FR64:$dst, fpimm0)]>,
1046 Requires<[HasSSE2]>, TB, OpSize;
1047
1048// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
1049// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +00001050def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001051 "movapd {$src, $dst|$dst, $src}", []>;
1052
1053// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
1054// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +00001055def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001056 "movapd {$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +00001057 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001058
1059// Alias bitwise logical operations using SSE logical ops on packed FP values.
1060let isTwoAddress = 1 in {
1061let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +00001062 def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001063 "andpd {$src2, $dst|$dst, $src2}",
1064 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001065 def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001066 "orpd {$src2, $dst|$dst, $src2}",
1067 [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001068 def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001069 "xorpd {$src2, $dst|$dst, $src2}",
1070 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
1071}
1072
Evan Chengb783fa32007-07-19 01:14:50 +00001073def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001074 "andpd {$src2, $dst|$dst, $src2}",
1075 [(set FR64:$dst, (X86fand FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001076 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001077def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001078 "orpd {$src2, $dst|$dst, $src2}",
1079 [(set FR64:$dst, (X86for FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001080 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001081def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001082 "xorpd {$src2, $dst|$dst, $src2}",
1083 [(set FR64:$dst, (X86fxor FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001084 (memopfsf64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001085
1086def FsANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001087 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001088 "andnpd {$src2, $dst|$dst, $src2}", []>;
1089def FsANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001090 (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001091 "andnpd {$src2, $dst|$dst, $src2}", []>;
1092}
1093
1094/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
1095///
1096/// In addition, we also have a special variant of the scalar form here to
1097/// represent the associated intrinsic operation. This form is unlike the
1098/// plain scalar form, in that it takes an entire vector (instead of a scalar)
1099/// and leaves the top elements undefined.
1100///
1101/// These three forms can each be reg+reg or reg+mem, so there are a total of
1102/// six "instructions".
1103///
1104let isTwoAddress = 1 in {
1105multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1106 SDNode OpNode, Intrinsic F64Int,
1107 bit Commutable = 0> {
1108 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001109 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001110 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1111 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1112 let isCommutable = Commutable;
1113 }
1114
1115 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001116 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001117 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1118 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1119
1120 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001121 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001122 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
1123 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1124 let isCommutable = Commutable;
1125 }
1126
1127 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001128 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001129 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001130 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001131
1132 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001133 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001134 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1135 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1136 let isCommutable = Commutable;
1137 }
1138
1139 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001140 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001141 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1142 [(set VR128:$dst, (F64Int VR128:$src1,
1143 sse_load_f64:$src2))]>;
1144}
1145}
1146
1147// Arithmetic instructions
1148defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
1149defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
1150defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
1151defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
1152
1153/// sse2_fp_binop_rm - Other SSE2 binops
1154///
1155/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
1156/// instructions for a full-vector intrinsic form. Operations that map
1157/// onto C operators don't use this form since they just use the plain
1158/// vector form instead of having a separate vector intrinsic form.
1159///
1160/// This provides a total of eight "instructions".
1161///
1162let isTwoAddress = 1 in {
1163multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1164 SDNode OpNode,
1165 Intrinsic F64Int,
1166 Intrinsic V2F64Int,
1167 bit Commutable = 0> {
1168
1169 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001170 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001171 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1172 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1173 let isCommutable = Commutable;
1174 }
1175
1176 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001177 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001178 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1179 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1180
1181 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001182 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001183 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
1184 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1185 let isCommutable = Commutable;
1186 }
1187
1188 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001189 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001190 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001191 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001192
1193 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001194 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001195 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1196 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1197 let isCommutable = Commutable;
1198 }
1199
1200 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001201 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001202 !strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2}"),
1203 [(set VR128:$dst, (F64Int VR128:$src1,
1204 sse_load_f64:$src2))]>;
1205
1206 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001207 def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001208 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
1209 [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
1210 let isCommutable = Commutable;
1211 }
1212
1213 // Vector intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001214 def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001215 !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2}"),
1216 [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
1217}
1218}
1219
1220defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
1221 int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
1222defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
1223 int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
1224
1225//===----------------------------------------------------------------------===//
1226// SSE packed FP Instructions
1227
1228// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001229def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001230 "movapd {$src, $dst|$dst, $src}", []>;
Evan Chengb783fa32007-07-19 01:14:50 +00001231def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001232 "movapd {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001233 [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001234
Evan Chengb783fa32007-07-19 01:14:50 +00001235def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001236 "movapd {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001237 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001238
Evan Chengb783fa32007-07-19 01:14:50 +00001239def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001240 "movupd {$src, $dst|$dst, $src}", []>;
Evan Chengb783fa32007-07-19 01:14:50 +00001241def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001242 "movupd {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001243 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001244def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001245 "movupd {$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001246 [(store (v2f64 VR128:$src), addr:$dst)]>;
1247
1248// Intrinsic forms of MOVUPD load and store
Evan Chengb783fa32007-07-19 01:14:50 +00001249def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001250 "movupd {$src, $dst|$dst, $src}",
1251 [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001252def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001253 "movupd {$src, $dst|$dst, $src}",
1254 [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001255
1256let isTwoAddress = 1 in {
1257 let AddedComplexity = 20 in {
1258 def MOVLPDrm : PDI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001259 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001260 "movlpd {$src2, $dst|$dst, $src2}",
1261 [(set VR128:$dst,
1262 (v2f64 (vector_shuffle VR128:$src1,
1263 (scalar_to_vector (loadf64 addr:$src2)),
1264 MOVLP_shuffle_mask)))]>;
1265 def MOVHPDrm : PDI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001266 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001267 "movhpd {$src2, $dst|$dst, $src2}",
1268 [(set VR128:$dst,
1269 (v2f64 (vector_shuffle VR128:$src1,
1270 (scalar_to_vector (loadf64 addr:$src2)),
1271 MOVHP_shuffle_mask)))]>;
1272 } // AddedComplexity
1273} // isTwoAddress
1274
Evan Chengb783fa32007-07-19 01:14:50 +00001275def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001276 "movlpd {$src, $dst|$dst, $src}",
1277 [(store (f64 (vector_extract (v2f64 VR128:$src),
1278 (iPTR 0))), addr:$dst)]>;
1279
1280// v2f64 extract element 1 is always custom lowered to unpack high to low
1281// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +00001282def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001283 "movhpd {$src, $dst|$dst, $src}",
1284 [(store (f64 (vector_extract
1285 (v2f64 (vector_shuffle VR128:$src, (undef),
1286 UNPCKH_shuffle_mask)), (iPTR 0))),
1287 addr:$dst)]>;
1288
1289// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001290def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001291 "cvtdq2ps {$src, $dst|$dst, $src}",
1292 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
1293 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001294def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001295 "cvtdq2ps {$src, $dst|$dst, $src}",
1296 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
Dan Gohman4a4f1512007-07-18 20:23:34 +00001297 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001298 TB, Requires<[HasSSE2]>;
1299
1300// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001301def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001302 "cvtdq2pd {$src, $dst|$dst, $src}",
1303 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
1304 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001305def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001306 "cvtdq2pd {$src, $dst|$dst, $src}",
1307 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
Dan Gohman4a4f1512007-07-18 20:23:34 +00001308 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001309 XS, Requires<[HasSSE2]>;
1310
Evan Chengb783fa32007-07-19 01:14:50 +00001311def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001312 "cvtps2dq {$src, $dst|$dst, $src}",
1313 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001314def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001315 "cvtps2dq {$src, $dst|$dst, $src}",
1316 [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1317 (load addr:$src)))]>;
1318// SSE2 packed instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001319def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001320 "cvttps2dq {$src, $dst|$dst, $src}",
1321 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
1322 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001323def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001324 "cvttps2dq {$src, $dst|$dst, $src}",
1325 [(set VR128:$dst, (int_x86_sse2_cvttps2dq
1326 (load addr:$src)))]>,
1327 XS, Requires<[HasSSE2]>;
1328
1329// SSE2 packed instructions with XD prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001330def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001331 "cvtpd2dq {$src, $dst|$dst, $src}",
1332 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
1333 XD, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001334def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001335 "cvtpd2dq {$src, $dst|$dst, $src}",
1336 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1337 (load addr:$src)))]>,
1338 XD, Requires<[HasSSE2]>;
1339
Evan Chengb783fa32007-07-19 01:14:50 +00001340def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001341 "cvttpd2dq {$src, $dst|$dst, $src}",
1342 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001343def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001344 "cvttpd2dq {$src, $dst|$dst, $src}",
1345 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1346 (load addr:$src)))]>;
1347
1348// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001349def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001350 "cvtps2pd {$src, $dst|$dst, $src}",
1351 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
1352 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001353def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001354 "cvtps2pd {$src, $dst|$dst, $src}",
1355 [(set VR128:$dst, (int_x86_sse2_cvtps2pd
1356 (load addr:$src)))]>,
1357 TB, Requires<[HasSSE2]>;
1358
Evan Chengb783fa32007-07-19 01:14:50 +00001359def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001360 "cvtpd2ps {$src, $dst|$dst, $src}",
1361 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001362def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001363 "cvtpd2ps {$src, $dst|$dst, $src}",
1364 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
1365 (load addr:$src)))]>;
1366
1367// Match intrinsics which expect XMM operand(s).
1368// Aliases for intrinsics
1369let isTwoAddress = 1 in {
1370def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001371 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001372 "cvtsi2sd {$src2, $dst|$dst, $src2}",
1373 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1374 GR32:$src2))]>;
1375def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001376 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001377 "cvtsi2sd {$src2, $dst|$dst, $src2}",
1378 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1379 (loadi32 addr:$src2)))]>;
1380def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001381 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001382 "cvtsd2ss {$src2, $dst|$dst, $src2}",
1383 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1384 VR128:$src2))]>;
1385def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001386 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001387 "cvtsd2ss {$src2, $dst|$dst, $src2}",
1388 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1389 (load addr:$src2)))]>;
1390def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001391 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001392 "cvtss2sd {$src2, $dst|$dst, $src2}",
1393 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1394 VR128:$src2))]>, XS,
1395 Requires<[HasSSE2]>;
1396def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001397 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001398 "cvtss2sd {$src2, $dst|$dst, $src2}",
1399 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1400 (load addr:$src2)))]>, XS,
1401 Requires<[HasSSE2]>;
1402}
1403
1404// Arithmetic
1405
1406/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
1407///
1408/// In addition, we also have a special variant of the scalar form here to
1409/// represent the associated intrinsic operation. This form is unlike the
1410/// plain scalar form, in that it takes an entire vector (instead of a
1411/// scalar) and leaves the top elements undefined.
1412///
1413/// And, we have a special variant form for a full-vector intrinsic form.
1414///
1415/// These four forms can each have a reg or a mem operand, so there are a
1416/// total of eight "instructions".
1417///
1418multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
1419 SDNode OpNode,
1420 Intrinsic F64Int,
1421 Intrinsic V2F64Int,
1422 bit Commutable = 0> {
1423 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001424 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001425 !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
1426 [(set FR64:$dst, (OpNode FR64:$src))]> {
1427 let isCommutable = Commutable;
1428 }
1429
1430 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001431 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001432 !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
1433 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
1434
1435 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001436 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001437 !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
1438 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
1439 let isCommutable = Commutable;
1440 }
1441
1442 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001443 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001444 !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001445 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001446
1447 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001448 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001449 !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
1450 [(set VR128:$dst, (F64Int VR128:$src))]> {
1451 let isCommutable = Commutable;
1452 }
1453
1454 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001455 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001456 !strconcat(OpcodeStr, "sd {$src, $dst|$dst, $src}"),
1457 [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
1458
1459 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +00001460 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001461 !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
1462 [(set VR128:$dst, (V2F64Int VR128:$src))]> {
1463 let isCommutable = Commutable;
1464 }
1465
1466 // Vector intrinsic operation, mem
Evan Chengb783fa32007-07-19 01:14:50 +00001467 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001468 !strconcat(OpcodeStr, "pd {$src, $dst|$dst, $src}"),
1469 [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
1470}
1471
1472// Square root.
1473defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
1474 int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
1475
1476// There is no f64 version of the reciprocal approximation instructions.
1477
1478// Logical
1479let isTwoAddress = 1 in {
1480 let isCommutable = 1 in {
1481 def ANDPDrr : PDI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001482 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001483 "andpd {$src2, $dst|$dst, $src2}",
1484 [(set VR128:$dst,
1485 (and (bc_v2i64 (v2f64 VR128:$src1)),
1486 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1487 def ORPDrr : PDI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001488 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001489 "orpd {$src2, $dst|$dst, $src2}",
1490 [(set VR128:$dst,
1491 (or (bc_v2i64 (v2f64 VR128:$src1)),
1492 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1493 def XORPDrr : PDI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001494 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001495 "xorpd {$src2, $dst|$dst, $src2}",
1496 [(set VR128:$dst,
1497 (xor (bc_v2i64 (v2f64 VR128:$src1)),
1498 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1499 }
1500
1501 def ANDPDrm : PDI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001502 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001503 "andpd {$src2, $dst|$dst, $src2}",
1504 [(set VR128:$dst,
1505 (and (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001506 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001507 def ORPDrm : PDI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001508 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001509 "orpd {$src2, $dst|$dst, $src2}",
1510 [(set VR128:$dst,
1511 (or (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001512 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001513 def XORPDrm : PDI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001514 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001515 "xorpd {$src2, $dst|$dst, $src2}",
1516 [(set VR128:$dst,
1517 (xor (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001518 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001519 def ANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001520 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001521 "andnpd {$src2, $dst|$dst, $src2}",
1522 [(set VR128:$dst,
1523 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
1524 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1525 def ANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001526 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001527 "andnpd {$src2, $dst|$dst, $src2}",
1528 [(set VR128:$dst,
1529 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001530 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001531}
1532
1533let isTwoAddress = 1 in {
1534 def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001535 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001536 "cmp${cc}pd {$src, $dst|$dst, $src}",
1537 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1538 VR128:$src, imm:$cc))]>;
1539 def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001540 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001541 "cmp${cc}pd {$src, $dst|$dst, $src}",
1542 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1543 (load addr:$src), imm:$cc))]>;
1544}
1545
1546// Shuffle and unpack instructions
1547let isTwoAddress = 1 in {
1548 def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001549 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001550 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
1551 [(set VR128:$dst, (v2f64 (vector_shuffle
1552 VR128:$src1, VR128:$src2,
1553 SHUFP_shuffle_mask:$src3)))]>;
1554 def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001555 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001556 f128mem:$src2, i8imm:$src3),
1557 "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}",
1558 [(set VR128:$dst,
1559 (v2f64 (vector_shuffle
1560 VR128:$src1, (load addr:$src2),
1561 SHUFP_shuffle_mask:$src3)))]>;
1562
1563 let AddedComplexity = 10 in {
1564 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001565 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001566 "unpckhpd {$src2, $dst|$dst, $src2}",
1567 [(set VR128:$dst,
1568 (v2f64 (vector_shuffle
1569 VR128:$src1, VR128:$src2,
1570 UNPCKH_shuffle_mask)))]>;
1571 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001572 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001573 "unpckhpd {$src2, $dst|$dst, $src2}",
1574 [(set VR128:$dst,
1575 (v2f64 (vector_shuffle
1576 VR128:$src1, (load addr:$src2),
1577 UNPCKH_shuffle_mask)))]>;
1578
1579 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001580 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001581 "unpcklpd {$src2, $dst|$dst, $src2}",
1582 [(set VR128:$dst,
1583 (v2f64 (vector_shuffle
1584 VR128:$src1, VR128:$src2,
1585 UNPCKL_shuffle_mask)))]>;
1586 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001587 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001588 "unpcklpd {$src2, $dst|$dst, $src2}",
1589 [(set VR128:$dst,
1590 (v2f64 (vector_shuffle
1591 VR128:$src1, (load addr:$src2),
1592 UNPCKL_shuffle_mask)))]>;
1593 } // AddedComplexity
1594} // isTwoAddress
1595
1596
1597//===----------------------------------------------------------------------===//
1598// SSE integer instructions
1599
1600// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001601def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001602 "movdqa {$src, $dst|$dst, $src}", []>;
Evan Chengb783fa32007-07-19 01:14:50 +00001603def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001604 "movdqa {$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001605 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001606def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001607 "movdqa {$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001608 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001609def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001610 "movdqu {$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001611 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001612 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001613def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001614 "movdqu {$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001615 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001616 XS, Requires<[HasSSE2]>;
1617
Dan Gohman4a4f1512007-07-18 20:23:34 +00001618// Intrinsic forms of MOVDQU load and store
Evan Chengb783fa32007-07-19 01:14:50 +00001619def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001620 "movdqu {$src, $dst|$dst, $src}",
1621 [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
1622 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001623def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001624 "movdqu {$src, $dst|$dst, $src}",
1625 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
1626 XS, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001627
1628let isTwoAddress = 1 in {
1629
1630multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
1631 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001632 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001633 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1634 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
1635 let isCommutable = Commutable;
1636 }
Evan Chengb783fa32007-07-19 01:14:50 +00001637 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001638 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1639 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001640 (bitconvert (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001641}
1642
1643multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
1644 string OpcodeStr, Intrinsic IntId> {
Evan Chengb783fa32007-07-19 01:14:50 +00001645 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001646 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1647 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001648 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001649 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1650 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001651 (bitconvert (memopv2i64 addr:$src2))))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001652 def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001653 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1654 [(set VR128:$dst, (IntId VR128:$src1,
1655 (scalar_to_vector (i32 imm:$src2))))]>;
1656}
1657
1658
1659/// PDI_binop_rm - Simple SSE2 binary operator.
1660multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1661 ValueType OpVT, bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001662 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001663 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1664 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
1665 let isCommutable = Commutable;
1666 }
Evan Chengb783fa32007-07-19 01:14:50 +00001667 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001668 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1669 [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001670 (bitconvert (memopv2i64 addr:$src2)))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001671}
1672
1673/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
1674///
1675/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
1676/// to collapse (bitconvert VT to VT) into its operand.
1677///
1678multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
1679 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001680 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001681 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
1682 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
1683 let isCommutable = Commutable;
1684 }
Evan Chengb783fa32007-07-19 01:14:50 +00001685 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001686 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001687 [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001688}
1689
1690} // isTwoAddress
1691
1692// 128-bit Integer Arithmetic
1693
1694defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
1695defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
1696defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
1697defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
1698
1699defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
1700defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
1701defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
1702defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
1703
1704defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
1705defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
1706defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
1707defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
1708
1709defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
1710defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
1711defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
1712defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
1713
1714defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
1715
1716defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
1717defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
1718defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
1719
1720defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
1721
1722defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
1723defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
1724
1725
1726defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
1727defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
1728defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
1729defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
1730defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
1731
1732
1733defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
1734defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
1735defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
1736
1737defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
1738defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
1739defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
1740
1741defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
1742defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
1743// PSRAQ doesn't exist in SSE[1-3].
1744
1745// 128-bit logical shifts.
1746let isTwoAddress = 1 in {
1747 def PSLLDQri : PDIi8<0x73, MRM7r,
Evan Chengb783fa32007-07-19 01:14:50 +00001748 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001749 "pslldq {$src2, $dst|$dst, $src2}", []>;
1750 def PSRLDQri : PDIi8<0x73, MRM3r,
Evan Chengb783fa32007-07-19 01:14:50 +00001751 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001752 "psrldq {$src2, $dst|$dst, $src2}", []>;
1753 // PSRADQri doesn't exist in SSE[1-3].
1754}
1755
1756let Predicates = [HasSSE2] in {
1757 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
1758 (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1759 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
1760 (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1761 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
1762 (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1763}
1764
1765// Logical
1766defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
1767defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
1768defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
1769
1770let isTwoAddress = 1 in {
1771 def PANDNrr : PDI<0xDF, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001772 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001773 "pandn {$src2, $dst|$dst, $src2}",
1774 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1775 VR128:$src2)))]>;
1776
1777 def PANDNrm : PDI<0xDF, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001778 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001779 "pandn {$src2, $dst|$dst, $src2}",
1780 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1781 (load addr:$src2))))]>;
1782}
1783
1784// SSE2 Integer comparison
1785defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
1786defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
1787defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
1788defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
1789defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
1790defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
1791
1792// Pack instructions
1793defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
1794defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
1795defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
1796
1797// Shuffle and unpack instructions
1798def PSHUFDri : PDIi8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001799 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001800 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
1801 [(set VR128:$dst, (v4i32 (vector_shuffle
1802 VR128:$src1, (undef),
1803 PSHUFD_shuffle_mask:$src2)))]>;
1804def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001805 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001806 "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
1807 [(set VR128:$dst, (v4i32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001808 (bc_v4i32(memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001809 (undef),
1810 PSHUFD_shuffle_mask:$src2)))]>;
1811
1812// SSE2 with ImmT == Imm8 and XS prefix.
1813def PSHUFHWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001814 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001815 "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
1816 [(set VR128:$dst, (v8i16 (vector_shuffle
1817 VR128:$src1, (undef),
1818 PSHUFHW_shuffle_mask:$src2)))]>,
1819 XS, Requires<[HasSSE2]>;
1820def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001821 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001822 "pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
1823 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001824 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001825 (undef),
1826 PSHUFHW_shuffle_mask:$src2)))]>,
1827 XS, Requires<[HasSSE2]>;
1828
1829// SSE2 with ImmT == Imm8 and XD prefix.
1830def PSHUFLWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001831 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001832 "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
1833 [(set VR128:$dst, (v8i16 (vector_shuffle
1834 VR128:$src1, (undef),
1835 PSHUFLW_shuffle_mask:$src2)))]>,
1836 XD, Requires<[HasSSE2]>;
1837def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001838 (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001839 "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
1840 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001841 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001842 (undef),
1843 PSHUFLW_shuffle_mask:$src2)))]>,
1844 XD, Requires<[HasSSE2]>;
1845
1846
1847let isTwoAddress = 1 in {
1848 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001849 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001850 "punpcklbw {$src2, $dst|$dst, $src2}",
1851 [(set VR128:$dst,
1852 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1853 UNPCKL_shuffle_mask)))]>;
1854 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001855 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001856 "punpcklbw {$src2, $dst|$dst, $src2}",
1857 [(set VR128:$dst,
1858 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001859 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001860 UNPCKL_shuffle_mask)))]>;
1861 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001862 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001863 "punpcklwd {$src2, $dst|$dst, $src2}",
1864 [(set VR128:$dst,
1865 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1866 UNPCKL_shuffle_mask)))]>;
1867 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001868 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001869 "punpcklwd {$src2, $dst|$dst, $src2}",
1870 [(set VR128:$dst,
1871 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001872 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001873 UNPCKL_shuffle_mask)))]>;
1874 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001875 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001876 "punpckldq {$src2, $dst|$dst, $src2}",
1877 [(set VR128:$dst,
1878 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1879 UNPCKL_shuffle_mask)))]>;
1880 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001881 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001882 "punpckldq {$src2, $dst|$dst, $src2}",
1883 [(set VR128:$dst,
1884 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001885 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001886 UNPCKL_shuffle_mask)))]>;
1887 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001888 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001889 "punpcklqdq {$src2, $dst|$dst, $src2}",
1890 [(set VR128:$dst,
1891 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1892 UNPCKL_shuffle_mask)))]>;
1893 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001894 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001895 "punpcklqdq {$src2, $dst|$dst, $src2}",
1896 [(set VR128:$dst,
1897 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001898 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001899 UNPCKL_shuffle_mask)))]>;
1900
1901 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001902 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001903 "punpckhbw {$src2, $dst|$dst, $src2}",
1904 [(set VR128:$dst,
1905 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1906 UNPCKH_shuffle_mask)))]>;
1907 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001908 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001909 "punpckhbw {$src2, $dst|$dst, $src2}",
1910 [(set VR128:$dst,
1911 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001912 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001913 UNPCKH_shuffle_mask)))]>;
1914 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001915 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001916 "punpckhwd {$src2, $dst|$dst, $src2}",
1917 [(set VR128:$dst,
1918 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1919 UNPCKH_shuffle_mask)))]>;
1920 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001921 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001922 "punpckhwd {$src2, $dst|$dst, $src2}",
1923 [(set VR128:$dst,
1924 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001925 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001926 UNPCKH_shuffle_mask)))]>;
1927 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001928 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001929 "punpckhdq {$src2, $dst|$dst, $src2}",
1930 [(set VR128:$dst,
1931 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1932 UNPCKH_shuffle_mask)))]>;
1933 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001934 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001935 "punpckhdq {$src2, $dst|$dst, $src2}",
1936 [(set VR128:$dst,
1937 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001938 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001939 UNPCKH_shuffle_mask)))]>;
1940 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001941 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001942 "punpckhqdq {$src2, $dst|$dst, $src2}",
1943 [(set VR128:$dst,
1944 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1945 UNPCKH_shuffle_mask)))]>;
1946 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001947 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001948 "punpckhqdq {$src2, $dst|$dst, $src2}",
1949 [(set VR128:$dst,
1950 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001951 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001952 UNPCKH_shuffle_mask)))]>;
1953}
1954
1955// Extract / Insert
1956def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001957 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001958 "pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
1959 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
1960 (iPTR imm:$src2)))]>;
1961let isTwoAddress = 1 in {
1962 def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001963 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001964 GR32:$src2, i32i8imm:$src3),
1965 "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
1966 [(set VR128:$dst,
1967 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
1968 GR32:$src2, (iPTR imm:$src3))))]>;
1969 def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001970 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001971 i16mem:$src2, i32i8imm:$src3),
1972 "pinsrw {$src3, $src2, $dst|$dst, $src2, $src3}",
1973 [(set VR128:$dst,
1974 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
1975 (i32 (anyext (loadi16 addr:$src2))),
1976 (iPTR imm:$src3))))]>;
1977}
1978
1979// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +00001980def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001981 "pmovmskb {$src, $dst|$dst, $src}",
1982 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
1983
1984// Conditional store
Evan Chengb783fa32007-07-19 01:14:50 +00001985def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001986 "maskmovdqu {$mask, $src|$src, $mask}",
1987 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
1988 Imp<[EDI],[]>;
1989
1990// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +00001991def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001992 "movntpd {$src, $dst|$dst, $src}",
1993 [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001994def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001995 "movntdq {$src, $dst|$dst, $src}",
1996 [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001997def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001998 "movnti {$src, $dst|$dst, $src}",
1999 [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
2000 TB, Requires<[HasSSE2]>;
2001
2002// Flush cache
Evan Chengb783fa32007-07-19 01:14:50 +00002003def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002004 "clflush $src", [(int_x86_sse2_clflush addr:$src)]>,
2005 TB, Requires<[HasSSE2]>;
2006
2007// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +00002008def LFENCE : I<0xAE, MRM5m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002009 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002010def MFENCE : I<0xAE, MRM6m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002011 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
2012
2013
2014// Alias instructions that map zero vector to pxor / xorp* for sse.
2015// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
2016let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00002017 def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002018 "pcmpeqd $dst, $dst",
2019 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
2020
2021// FR64 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +00002022def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002023 "movsd {$src, $dst|$dst, $src}",
2024 [(set VR128:$dst,
2025 (v2f64 (scalar_to_vector FR64:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002026def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002027 "movsd {$src, $dst|$dst, $src}",
2028 [(set VR128:$dst,
2029 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
2030
Evan Chengb783fa32007-07-19 01:14:50 +00002031def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002032 "movd {$src, $dst|$dst, $src}",
2033 [(set VR128:$dst,
2034 (v4i32 (scalar_to_vector GR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002035def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002036 "movd {$src, $dst|$dst, $src}",
2037 [(set VR128:$dst,
2038 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
2039
Evan Chengb783fa32007-07-19 01:14:50 +00002040def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002041 "movd {$src, $dst|$dst, $src}",
2042 [(set FR32:$dst, (bitconvert GR32:$src))]>;
2043
Evan Chengb783fa32007-07-19 01:14:50 +00002044def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002045 "movd {$src, $dst|$dst, $src}",
2046 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
2047
2048// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00002049def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002050 "movq {$src, $dst|$dst, $src}",
2051 [(set VR128:$dst,
2052 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
2053 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002054def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002055 "movq {$src, $dst|$dst, $src}",
2056 [(store (i64 (vector_extract (v2i64 VR128:$src),
2057 (iPTR 0))), addr:$dst)]>;
2058
2059// FIXME: may not be able to eliminate this movss with coalescing the src and
2060// dest register classes are different. We really want to write this pattern
2061// like this:
2062// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
2063// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +00002064def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002065 "movsd {$src, $dst|$dst, $src}",
2066 [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
2067 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002068def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002069 "movsd {$src, $dst|$dst, $src}",
2070 [(store (f64 (vector_extract (v2f64 VR128:$src),
2071 (iPTR 0))), addr:$dst)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002072def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002073 "movd {$src, $dst|$dst, $src}",
2074 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
2075 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002076def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002077 "movd {$src, $dst|$dst, $src}",
2078 [(store (i32 (vector_extract (v4i32 VR128:$src),
2079 (iPTR 0))), addr:$dst)]>;
2080
Evan Chengb783fa32007-07-19 01:14:50 +00002081def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002082 "movd {$src, $dst|$dst, $src}",
2083 [(set GR32:$dst, (bitconvert FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002084def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002085 "movd {$src, $dst|$dst, $src}",
2086 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
2087
2088
2089// Move to lower bits of a VR128, leaving upper bits alone.
2090// Three operand (but two address) aliases.
2091let isTwoAddress = 1 in {
2092 def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002093 (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002094 "movsd {$src2, $dst|$dst, $src2}", []>;
2095
2096 let AddedComplexity = 15 in
2097 def MOVLPDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002098 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002099 "movsd {$src2, $dst|$dst, $src2}",
2100 [(set VR128:$dst,
2101 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
2102 MOVL_shuffle_mask)))]>;
2103}
2104
2105// Store / copy lower 64-bits of a XMM register.
Evan Chengb783fa32007-07-19 01:14:50 +00002106def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002107 "movq {$src, $dst|$dst, $src}",
2108 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
2109
2110// Move to lower bits of a VR128 and zeroing upper bits.
2111// Loading from memory automatically zeroing upper bits.
2112let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002113 def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002114 "movsd {$src, $dst|$dst, $src}",
2115 [(set VR128:$dst,
2116 (v2f64 (vector_shuffle immAllZerosV,
2117 (v2f64 (scalar_to_vector
2118 (loadf64 addr:$src))),
2119 MOVL_shuffle_mask)))]>;
2120
2121let AddedComplexity = 15 in
2122// movd / movq to XMM register zero-extends
Evan Chengb783fa32007-07-19 01:14:50 +00002123def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002124 "movd {$src, $dst|$dst, $src}",
2125 [(set VR128:$dst,
2126 (v4i32 (vector_shuffle immAllZerosV,
2127 (v4i32 (scalar_to_vector GR32:$src)),
2128 MOVL_shuffle_mask)))]>;
2129let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002130def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002131 "movd {$src, $dst|$dst, $src}",
2132 [(set VR128:$dst,
2133 (v4i32 (vector_shuffle immAllZerosV,
2134 (v4i32 (scalar_to_vector (loadi32 addr:$src))),
2135 MOVL_shuffle_mask)))]>;
2136
2137// Moving from XMM to XMM but still clear upper 64 bits.
2138let AddedComplexity = 15 in
Evan Chengb783fa32007-07-19 01:14:50 +00002139def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002140 "movq {$src, $dst|$dst, $src}",
2141 [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
2142 XS, Requires<[HasSSE2]>;
2143let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002144def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002145 "movq {$src, $dst|$dst, $src}",
2146 [(set VR128:$dst, (int_x86_sse2_movl_dq
Dan Gohman4a4f1512007-07-18 20:23:34 +00002147 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002148 XS, Requires<[HasSSE2]>;
2149
2150
2151//===----------------------------------------------------------------------===//
2152// SSE3 Instructions
2153//===----------------------------------------------------------------------===//
2154
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002155// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00002156def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002157 "movshdup {$src, $dst|$dst, $src}",
2158 [(set VR128:$dst, (v4f32 (vector_shuffle
2159 VR128:$src, (undef),
2160 MOVSHDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002161def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002162 "movshdup {$src, $dst|$dst, $src}",
2163 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002164 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002165 MOVSHDUP_shuffle_mask)))]>;
2166
Evan Chengb783fa32007-07-19 01:14:50 +00002167def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002168 "movsldup {$src, $dst|$dst, $src}",
2169 [(set VR128:$dst, (v4f32 (vector_shuffle
2170 VR128:$src, (undef),
2171 MOVSLDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002172def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002173 "movsldup {$src, $dst|$dst, $src}",
2174 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002175 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002176 MOVSLDUP_shuffle_mask)))]>;
2177
Evan Chengb783fa32007-07-19 01:14:50 +00002178def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002179 "movddup {$src, $dst|$dst, $src}",
2180 [(set VR128:$dst, (v2f64 (vector_shuffle
2181 VR128:$src, (undef),
2182 SSE_splat_lo_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002183def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002184 "movddup {$src, $dst|$dst, $src}",
2185 [(set VR128:$dst,
2186 (v2f64 (vector_shuffle
2187 (scalar_to_vector (loadf64 addr:$src)),
2188 (undef),
2189 SSE_splat_lo_mask)))]>;
2190
2191// Arithmetic
2192let isTwoAddress = 1 in {
2193 def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002194 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002195 "addsubps {$src2, $dst|$dst, $src2}",
2196 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2197 VR128:$src2))]>;
2198 def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002199 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002200 "addsubps {$src2, $dst|$dst, $src2}",
2201 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2202 (load addr:$src2)))]>;
2203 def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002204 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002205 "addsubpd {$src2, $dst|$dst, $src2}",
2206 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2207 VR128:$src2))]>;
2208 def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002209 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002210 "addsubpd {$src2, $dst|$dst, $src2}",
2211 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2212 (load addr:$src2)))]>;
2213}
2214
Evan Chengb783fa32007-07-19 01:14:50 +00002215def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002216 "lddqu {$src, $dst|$dst, $src}",
2217 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
2218
2219// Horizontal ops
2220class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002221 : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002222 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
2223 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
2224class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002225 : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002226 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
2227 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
2228class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002229 : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002230 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
2231 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
2232class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002233 : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002234 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
2235 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
2236
2237let isTwoAddress = 1 in {
2238 def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2239 def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2240 def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2241 def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2242 def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2243 def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2244 def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2245 def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2246}
2247
2248// Thread synchronization
Evan Chengb783fa32007-07-19 01:14:50 +00002249def MONITOR : I<0xC8, RawFrm, (outs), (ins), "monitor",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002250 [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002251def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002252 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
2253
2254// vector_shuffle v1, <undef> <1, 1, 3, 3>
2255let AddedComplexity = 15 in
2256def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2257 MOVSHDUP_shuffle_mask)),
2258 (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2259let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002260def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002261 MOVSHDUP_shuffle_mask)),
2262 (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
2263
2264// vector_shuffle v1, <undef> <0, 0, 2, 2>
2265let AddedComplexity = 15 in
2266 def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2267 MOVSLDUP_shuffle_mask)),
2268 (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2269let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002270 def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002271 MOVSLDUP_shuffle_mask)),
2272 (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
2273
2274//===----------------------------------------------------------------------===//
2275// SSSE3 Instructions
2276//===----------------------------------------------------------------------===//
2277
2278// SSE3 Instruction Templates:
2279//
2280// SS38I - SSSE3 instructions with T8 and OpSize prefixes.
2281// SS3AI - SSSE3 instructions with TA and OpSize prefixes.
2282
Evan Chengb783fa32007-07-19 01:14:50 +00002283class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
2284 list<dag> pattern>
2285 : I<o, F, outs, ins, asm, pattern>, T8, OpSize, Requires<[HasSSSE3]>;
2286class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
2287 list<dag> pattern>
2288 : I<o, F, outs, ins, asm, pattern>, TA, OpSize, Requires<[HasSSSE3]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002289
2290/// SS3I_binop_rm_int - Simple SSSE3 binary operatr whose type is v2i64.
2291let isTwoAddress = 1 in {
2292 multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
2293 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00002294 def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002295 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
2296 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
2297 let isCommutable = Commutable;
2298 }
Evan Chengb783fa32007-07-19 01:14:50 +00002299 def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002300 !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
2301 [(set VR128:$dst,
2302 (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002303 (bitconvert (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002304 }
2305}
2306
2307defm PMULHRSW128 : SS3I_binop_rm_int<0x0B, "pmulhrsw",
2308 int_x86_ssse3_pmulhrsw_128, 1>;
2309
2310//===----------------------------------------------------------------------===//
2311// Non-Instruction Patterns
2312//===----------------------------------------------------------------------===//
2313
2314// 128-bit vector undef's.
2315def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2316def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2317def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2318def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2319def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2320
2321// 128-bit vector all zero's.
2322def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2323def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2324def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2325def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2326def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2327
2328// 128-bit vector all one's.
2329def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2330def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2331def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2332def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2333def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
2334
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002335
2336// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
2337// 16-bits matter.
2338def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2339 Requires<[HasSSE2]>;
2340def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2341 Requires<[HasSSE2]>;
2342
2343// bit_convert
2344let Predicates = [HasSSE2] in {
2345 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
2346 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
2347 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
2348 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
2349 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
2350 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
2351 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
2352 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
2353 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
2354 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
2355 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
2356 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
2357 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
2358 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
2359 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
2360 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
2361 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
2362 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
2363 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
2364 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
2365 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
2366 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
2367 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
2368 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
2369 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
2370 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
2371 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
2372 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
2373 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
2374 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
2375}
2376
2377// Move scalar to XMM zero-extended
2378// movd to XMM register zero-extends
2379let AddedComplexity = 15 in {
2380def : Pat<(v8i16 (vector_shuffle immAllZerosV,
2381 (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2382 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2383def : Pat<(v16i8 (vector_shuffle immAllZerosV,
2384 (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2385 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2386// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
2387def : Pat<(v2f64 (vector_shuffle immAllZerosV,
2388 (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
2389 (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
2390def : Pat<(v4f32 (vector_shuffle immAllZerosV,
2391 (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
2392 (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
2393}
2394
2395// Splat v2f64 / v2i64
2396let AddedComplexity = 10 in {
2397def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2398 (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2399def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2400 (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2401def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2402 (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2403def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2404 (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2405}
2406
2407// Splat v4f32
2408def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
2409 (SHUFPSrri VR128:$src, VR128:$src, SSE_splat_mask:$sm)>,
2410 Requires<[HasSSE1]>;
2411
2412// Special unary SHUFPSrri case.
2413// FIXME: when we want non two-address code, then we should use PSHUFD?
2414def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
2415 SHUFP_unary_shuffle_mask:$sm),
2416 (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2417 Requires<[HasSSE1]>;
2418// Unary v4f32 shuffle with PSHUF* in order to fold a load.
Dan Gohman4a4f1512007-07-18 20:23:34 +00002419def : Pat<(vector_shuffle (memopv4f32 addr:$src1), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002420 SHUFP_unary_shuffle_mask:$sm),
2421 (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2422 Requires<[HasSSE2]>;
2423// Special binary v4i32 shuffle cases with SHUFPS.
2424def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
2425 PSHUFD_binary_shuffle_mask:$sm),
2426 (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2427 Requires<[HasSSE2]>;
2428def : Pat<(vector_shuffle (v4i32 VR128:$src1),
Dan Gohman4a4f1512007-07-18 20:23:34 +00002429 (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002430 (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2431 Requires<[HasSSE2]>;
2432
2433// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
2434let AddedComplexity = 10 in {
2435def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2436 UNPCKL_v_undef_shuffle_mask)),
2437 (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2438def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2439 UNPCKL_v_undef_shuffle_mask)),
2440 (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2441def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2442 UNPCKL_v_undef_shuffle_mask)),
2443 (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2444def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2445 UNPCKL_v_undef_shuffle_mask)),
2446 (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2447}
2448
2449// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
2450let AddedComplexity = 10 in {
2451def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2452 UNPCKH_v_undef_shuffle_mask)),
2453 (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2454def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2455 UNPCKH_v_undef_shuffle_mask)),
2456 (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2457def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2458 UNPCKH_v_undef_shuffle_mask)),
2459 (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2460def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2461 UNPCKH_v_undef_shuffle_mask)),
2462 (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2463}
2464
2465let AddedComplexity = 15 in {
2466// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
2467def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2468 MOVHP_shuffle_mask)),
2469 (MOVLHPSrr VR128:$src1, VR128:$src2)>;
2470
2471// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
2472def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2473 MOVHLPS_shuffle_mask)),
2474 (MOVHLPSrr VR128:$src1, VR128:$src2)>;
2475
2476// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
2477def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
2478 MOVHLPS_v_undef_shuffle_mask)),
2479 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2480def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
2481 MOVHLPS_v_undef_shuffle_mask)),
2482 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2483}
2484
2485let AddedComplexity = 20 in {
2486// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
2487// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
Dan Gohman4a4f1512007-07-18 20:23:34 +00002488def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002489 MOVLP_shuffle_mask)),
2490 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002491def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002492 MOVLP_shuffle_mask)),
2493 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002494def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002495 MOVHP_shuffle_mask)),
2496 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002497def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002498 MOVHP_shuffle_mask)),
2499 (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2500
Dan Gohman4a4f1512007-07-18 20:23:34 +00002501def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002502 MOVLP_shuffle_mask)),
2503 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002504def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002505 MOVLP_shuffle_mask)),
2506 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002507def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002508 MOVHP_shuffle_mask)),
2509 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002510def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002511 MOVLP_shuffle_mask)),
2512 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2513}
2514
2515let AddedComplexity = 15 in {
2516// Setting the lowest element in the vector.
2517def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2518 MOVL_shuffle_mask)),
2519 (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2520def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2521 MOVL_shuffle_mask)),
2522 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2523
2524// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
2525def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
2526 MOVLP_shuffle_mask)),
2527 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2528def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2529 MOVLP_shuffle_mask)),
2530 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2531}
2532
2533// Set lowest element and zero upper elements.
2534let AddedComplexity = 20 in
2535def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV,
2536 (v2f64 (scalar_to_vector (loadf64 addr:$src))),
2537 MOVL_shuffle_mask)),
2538 (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
2539
2540// FIXME: Temporary workaround since 2-wide shuffle is broken.
2541def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
2542 (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2543def : Pat<(int_x86_sse2_loadh_pd VR128:$src1, addr:$src2),
2544 (v2f64 (MOVHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2545def : Pat<(int_x86_sse2_loadl_pd VR128:$src1, addr:$src2),
2546 (v2f64 (MOVLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2547def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
2548 (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
2549 Requires<[HasSSE2]>;
2550def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (load addr:$src2), imm:$src3),
2551 (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
2552 Requires<[HasSSE2]>;
2553def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
2554 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2555def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (load addr:$src2)),
2556 (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2557def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
2558 (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2559def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (load addr:$src2)),
2560 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2561def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
2562 (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2563def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (load addr:$src2)),
2564 (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2565def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
2566 (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2567def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (load addr:$src2)),
2568 (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2569
2570// Some special case pandn patterns.
2571def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
2572 VR128:$src2)),
2573 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2574def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
2575 VR128:$src2)),
2576 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2577def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
2578 VR128:$src2)),
2579 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2580
2581def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
2582 (load addr:$src2))),
2583 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2584def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
2585 (load addr:$src2))),
2586 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2587def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
2588 (load addr:$src2))),
2589 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2590
Evan Cheng51a49b22007-07-20 00:27:43 +00002591// Use movaps / movups for SSE integer load / store (one byte shorter).
Dan Gohman11821702007-07-27 17:16:43 +00002592def : Pat<(alignedloadv4i32 addr:$src),
2593 (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
2594def : Pat<(loadv4i32 addr:$src),
2595 (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
Evan Cheng51a49b22007-07-20 00:27:43 +00002596def : Pat<(alignedloadv2i64 addr:$src),
2597 (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
2598def : Pat<(loadv2i64 addr:$src),
2599 (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
2600
2601def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
2602 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2603def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
2604 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2605def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
2606 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2607def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
2608 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2609def : Pat<(store (v2i64 VR128:$src), addr:$dst),
2610 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2611def : Pat<(store (v4i32 VR128:$src), addr:$dst),
2612 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2613def : Pat<(store (v8i16 VR128:$src), addr:$dst),
2614 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2615def : Pat<(store (v16i8 VR128:$src), addr:$dst),
2616 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
Evan Cheng86ab7d32007-07-31 08:04:03 +00002617
2618// (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr)
2619def : Pat<(vector_extract
2620 (bc_v4i32 (v4f32 (scalar_to_vector (loadf32 addr:$src)))), (iPTR 0)),
2621 (MOV32rm addr:$src)>;
2622def : Pat<(vector_extract
2623 (bc_v2i64 (v2f64 (scalar_to_vector (loadf64 addr:$src)))), (iPTR 0)),
2624 (MOV64rm addr:$src)>;