blob: 54dd872e2bc3dac55781d0f508be35bf7d2eb245 [file] [log] [blame]
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001//====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the University
6// of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 SSE instruction set, defining the instructions,
11// and properties of the instructions which are needed for code generation,
12// machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16
17//===----------------------------------------------------------------------===//
18// SSE specific DAG Nodes.
19//===----------------------------------------------------------------------===//
20
21def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
22 SDTCisFP<0>, SDTCisInt<2> ]>;
23
Dan Gohmanf17a25c2007-07-18 16:29:46 +000024def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
25def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
26def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
27 [SDNPCommutative, SDNPAssociative]>;
28def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
29 [SDNPCommutative, SDNPAssociative]>;
30def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
31 [SDNPCommutative, SDNPAssociative]>;
32def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
33def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
34def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
35def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
36 [SDNPHasChain, SDNPOutFlag]>;
37def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest,
38 [SDNPHasChain, SDNPOutFlag]>;
Evan Cheng950aac02007-09-25 01:57:46 +000039def X86comi_new: SDNode<"X86ISD::COMI_NEW", SDTX86CmpTest,
40 [SDNPHasChain]>;
41def X86ucomi_new: SDNode<"X86ISD::UCOMI_NEW",SDTX86CmpTest>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +000042def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
43def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
44def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
45
46//===----------------------------------------------------------------------===//
47// SSE 'Special' Instructions
48//===----------------------------------------------------------------------===//
49
Evan Chengb783fa32007-07-19 01:14:50 +000050def IMPLICIT_DEF_VR128 : I<0, Pseudo, (outs VR128:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000051 "#IMPLICIT_DEF $dst",
52 [(set VR128:$dst, (v4f32 (undef)))]>,
53 Requires<[HasSSE1]>;
Evan Chengb783fa32007-07-19 01:14:50 +000054def IMPLICIT_DEF_FR32 : I<0, Pseudo, (outs FR32:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000055 "#IMPLICIT_DEF $dst",
Dale Johannesene0e0fd02007-09-23 14:52:20 +000056 [(set FR32:$dst, (undef))]>, Requires<[HasSSE1]>;
Evan Chengb783fa32007-07-19 01:14:50 +000057def IMPLICIT_DEF_FR64 : I<0, Pseudo, (outs FR64:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000058 "#IMPLICIT_DEF $dst",
59 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
60
61//===----------------------------------------------------------------------===//
62// SSE Complex Patterns
63//===----------------------------------------------------------------------===//
64
65// These are 'extloads' from a scalar to the low element of a vector, zeroing
66// the top elements. These are used for the SSE 'ss' and 'sd' instruction
67// forms.
68def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", [],
69 [SDNPHasChain]>;
70def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", [],
71 [SDNPHasChain]>;
72
73def ssmem : Operand<v4f32> {
74 let PrintMethod = "printf32mem";
75 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
76}
77def sdmem : Operand<v2f64> {
78 let PrintMethod = "printf64mem";
79 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
80}
81
82//===----------------------------------------------------------------------===//
83// SSE pattern fragments
84//===----------------------------------------------------------------------===//
85
Dan Gohmanf17a25c2007-07-18 16:29:46 +000086def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
87def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
88def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
89def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
90
Dan Gohman11821702007-07-27 17:16:43 +000091// Like 'store', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000092def alignedstore : PatFrag<(ops node:$val, node:$ptr),
93 (st node:$val, node:$ptr), [{
94 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
95 return !ST->isTruncatingStore() &&
96 ST->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +000097 ST->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +000098 return false;
99}]>;
100
Dan Gohman11821702007-07-27 17:16:43 +0000101// Like 'load', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +0000102def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
103 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
104 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
105 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000106 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000107 return false;
108}]>;
109
Dan Gohman11821702007-07-27 17:16:43 +0000110def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
111def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000112def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
113def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
114def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
115def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>;
116
117// Like 'load', but uses special alignment checks suitable for use in
118// memory operands in most SSE instructions, which are required to
119// be naturally aligned on some targets but not on others.
120// FIXME: Actually implement support for targets that don't require the
121// alignment. This probably wants a subtarget predicate.
122def memop : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
123 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
124 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
125 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000126 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000127 return false;
128}]>;
129
Dan Gohman11821702007-07-27 17:16:43 +0000130def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
131def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000132def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
133def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
134def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
135def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
136
Bill Wendling3b15d722007-08-11 09:52:53 +0000137// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
138// 16-byte boundary.
139def memop64 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
140 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
141 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
142 LD->getAddressingMode() == ISD::UNINDEXED &&
143 LD->getAlignment() >= 8;
144 return false;
145}]>;
146
147def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
148def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop64 node:$ptr))>;
149def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
150def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
151def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
152
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000153def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
154def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
155def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
156def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
157def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
158def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
159
160def fp32imm0 : PatLeaf<(f32 fpimm), [{
161 return N->isExactlyValue(+0.0);
162}]>;
163
164def PSxLDQ_imm : SDNodeXForm<imm, [{
165 // Transformation function: imm >> 3
166 return getI32Imm(N->getValue() >> 3);
167}]>;
168
169// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
170// SHUFP* etc. imm.
171def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
172 return getI8Imm(X86::getShuffleSHUFImmediate(N));
173}]>;
174
175// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
176// PSHUFHW imm.
177def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
178 return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
179}]>;
180
181// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
182// PSHUFLW imm.
183def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
184 return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
185}]>;
186
187def SSE_splat_mask : PatLeaf<(build_vector), [{
188 return X86::isSplatMask(N);
189}], SHUFFLE_get_shuf_imm>;
190
191def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
192 return X86::isSplatLoMask(N);
193}]>;
194
195def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
196 return X86::isMOVHLPSMask(N);
197}]>;
198
199def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
200 return X86::isMOVHLPS_v_undef_Mask(N);
201}]>;
202
203def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
204 return X86::isMOVHPMask(N);
205}]>;
206
207def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
208 return X86::isMOVLPMask(N);
209}]>;
210
211def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
212 return X86::isMOVLMask(N);
213}]>;
214
215def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
216 return X86::isMOVSHDUPMask(N);
217}]>;
218
219def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
220 return X86::isMOVSLDUPMask(N);
221}]>;
222
223def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
224 return X86::isUNPCKLMask(N);
225}]>;
226
227def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
228 return X86::isUNPCKHMask(N);
229}]>;
230
231def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
232 return X86::isUNPCKL_v_undef_Mask(N);
233}]>;
234
235def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
236 return X86::isUNPCKH_v_undef_Mask(N);
237}]>;
238
239def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
240 return X86::isPSHUFDMask(N);
241}], SHUFFLE_get_shuf_imm>;
242
243def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
244 return X86::isPSHUFHWMask(N);
245}], SHUFFLE_get_pshufhw_imm>;
246
247def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
248 return X86::isPSHUFLWMask(N);
249}], SHUFFLE_get_pshuflw_imm>;
250
251def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
252 return X86::isPSHUFDMask(N);
253}], SHUFFLE_get_shuf_imm>;
254
255def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
256 return X86::isSHUFPMask(N);
257}], SHUFFLE_get_shuf_imm>;
258
259def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
260 return X86::isSHUFPMask(N);
261}], SHUFFLE_get_shuf_imm>;
262
263//===----------------------------------------------------------------------===//
264// SSE scalar FP Instructions
265//===----------------------------------------------------------------------===//
266
267// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
268// scheduler into a branch sequence.
Evan Cheng950aac02007-09-25 01:57:46 +0000269// These are expanded by the scheduler.
270let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000271 def CMOV_FR32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000272 (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000273 "#CMOV_FR32 PSEUDO!",
274 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
275 def CMOV_FR64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000276 (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000277 "#CMOV_FR64 PSEUDO!",
278 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
279 def CMOV_V4F32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000280 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000281 "#CMOV_V4F32 PSEUDO!",
282 [(set VR128:$dst,
283 (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
284 def CMOV_V2F64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000285 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000286 "#CMOV_V2F64 PSEUDO!",
287 [(set VR128:$dst,
288 (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
289 def CMOV_V2I64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000290 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000291 "#CMOV_V2I64 PSEUDO!",
292 [(set VR128:$dst,
293 (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
Evan Cheng950aac02007-09-25 01:57:46 +0000294
295 def NEW_CMOV_FR32 : I<0, Pseudo,
296 (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
297 "#CMOV_FR32 PSEUDO!",
298 [(set FR32:$dst, (X86cmov_new FR32:$t, FR32:$f, imm:$cond,
299 EFLAGS))]>;
300 def NEW_CMOV_FR64 : I<0, Pseudo,
301 (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
302 "#CMOV_FR64 PSEUDO!",
303 [(set FR64:$dst, (X86cmov_new FR64:$t, FR64:$f, imm:$cond,
304 EFLAGS))]>;
305 def NEW_CMOV_V4F32 : I<0, Pseudo,
306 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
307 "#CMOV_V4F32 PSEUDO!",
308 [(set VR128:$dst,
309 (v4f32 (X86cmov_new VR128:$t, VR128:$f, imm:$cond,
310 EFLAGS)))]>;
311 def NEW_CMOV_V2F64 : I<0, Pseudo,
312 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
313 "#CMOV_V2F64 PSEUDO!",
314 [(set VR128:$dst,
315 (v2f64 (X86cmov_new VR128:$t, VR128:$f, imm:$cond,
316 EFLAGS)))]>;
317 def NEW_CMOV_V2I64 : I<0, Pseudo,
318 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
319 "#CMOV_V2I64 PSEUDO!",
320 [(set VR128:$dst,
321 (v2i64 (X86cmov_new VR128:$t, VR128:$f, imm:$cond,
322 EFLAGS)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000323}
324
325//===----------------------------------------------------------------------===//
326// SSE1 Instructions
327//===----------------------------------------------------------------------===//
328
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000329// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000330def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000331 "movss\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000332let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000333def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000334 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000335 [(set FR32:$dst, (loadf32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000336def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000337 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000338 [(store FR32:$src, addr:$dst)]>;
339
340// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000341def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000342 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000343 [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000344def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000345 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000346 [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000347def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000348 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000349 [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000350def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000351 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000352 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
353
354// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +0000355def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000356 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000357 [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000358def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000359 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000360 [(set GR32:$dst, (int_x86_sse_cvtss2si
361 (load addr:$src)))]>;
362
363// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +0000364def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000365 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000366 [(set GR32:$dst,
367 (int_x86_sse_cvttss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000368def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000369 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000370 [(set GR32:$dst,
371 (int_x86_sse_cvttss2si(load addr:$src)))]>;
372
373let isTwoAddress = 1 in {
374 def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000375 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000376 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000377 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
378 GR32:$src2))]>;
379 def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000380 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000381 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000382 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
383 (loadi32 addr:$src2)))]>;
384}
385
386// Comparison instructions
387let isTwoAddress = 1 in {
388 def CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000389 (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000390 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000391 def CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000392 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000393 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000394}
395
Evan Cheng55687072007-09-14 21:48:26 +0000396let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +0000397def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000398 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000399 [(X86cmp FR32:$src1, FR32:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000400def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000401 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000402 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
Evan Cheng950aac02007-09-25 01:57:46 +0000403
404def NEW_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
405 "ucomiss\t{$src2, $src1|$src1, $src2}",
406 [(X86cmp_new FR32:$src1, FR32:$src2), (implicit EFLAGS)]>;
407def NEW_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
408 "ucomiss\t{$src2, $src1|$src1, $src2}",
409 [(X86cmp_new FR32:$src1, (loadf32 addr:$src2)),
410 (implicit EFLAGS)]>;
Evan Cheng55687072007-09-14 21:48:26 +0000411} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000412
413// Aliases to match intrinsics which expect XMM operand(s).
414let isTwoAddress = 1 in {
415 def Int_CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000416 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000417 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000418 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
419 VR128:$src, imm:$cc))]>;
420 def Int_CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000421 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000422 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000423 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
424 (load addr:$src), imm:$cc))]>;
425}
426
Evan Cheng55687072007-09-14 21:48:26 +0000427let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +0000428def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000429 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000430 [(X86ucomi (v4f32 VR128:$src1), VR128:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000431def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000432 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000433 [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2))]>;
434
Evan Chengb783fa32007-07-19 01:14:50 +0000435def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000436 "comiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000437 [(X86comi (v4f32 VR128:$src1), VR128:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000438def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000439 "comiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000440 [(X86comi (v4f32 VR128:$src1), (load addr:$src2))]>;
Evan Cheng950aac02007-09-25 01:57:46 +0000441
442def NEW_Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs),
443 (ins VR128:$src1, VR128:$src2),
444 "ucomiss\t{$src2, $src1|$src1, $src2}",
445 [(X86ucomi_new (v4f32 VR128:$src1), VR128:$src2),
446 (implicit EFLAGS)]>;
447def NEW_Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),
448 (ins VR128:$src1, f128mem:$src2),
449 "ucomiss\t{$src2, $src1|$src1, $src2}",
450 [(X86ucomi_new (v4f32 VR128:$src1), (load addr:$src2)),
451 (implicit EFLAGS)]>;
452
453def NEW_Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs),
454 (ins VR128:$src1, VR128:$src2),
455 "comiss\t{$src2, $src1|$src1, $src2}",
456 [(X86comi_new (v4f32 VR128:$src1), VR128:$src2),
457 (implicit EFLAGS)]>;
458def NEW_Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs),
459 (ins VR128:$src1, f128mem:$src2),
460 "comiss\t{$src2, $src1|$src1, $src2}",
461 [(X86comi_new (v4f32 VR128:$src1), (load addr:$src2)),
462 (implicit EFLAGS)]>;
Evan Cheng55687072007-09-14 21:48:26 +0000463} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000464
465// Aliases of packed SSE1 instructions for scalar use. These all have names that
466// start with 'Fs'.
467
468// Alias instructions that map fld0 to pxor for sse.
Dan Gohman8aef09b2007-09-07 21:32:51 +0000469let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000470def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000471 "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000472 Requires<[HasSSE1]>, TB, OpSize;
473
474// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
475// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +0000476def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000477 "movaps\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000478
479// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
480// disregarded.
Evan Cheng4e84e452007-08-30 05:49:43 +0000481let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000482def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000483 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +0000484 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000485
486// Alias bitwise logical operations using SSE logical ops on packed FP values.
487let isTwoAddress = 1 in {
488let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000489 def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000490 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000491 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000492 def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000493 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000494 [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000495 def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000496 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000497 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
498}
499
Evan Chengb783fa32007-07-19 01:14:50 +0000500def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000501 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000502 [(set FR32:$dst, (X86fand FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000503 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000504def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000505 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000506 [(set FR32:$dst, (X86for FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000507 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000508def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000509 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000510 [(set FR32:$dst, (X86fxor FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000511 (memopfsf32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000512
513def FsANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000514 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000515 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000516def FsANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000517 (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000518 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000519}
520
521/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
522///
523/// In addition, we also have a special variant of the scalar form here to
524/// represent the associated intrinsic operation. This form is unlike the
525/// plain scalar form, in that it takes an entire vector (instead of a scalar)
526/// and leaves the top elements undefined.
527///
528/// These three forms can each be reg+reg or reg+mem, so there are a total of
529/// six "instructions".
530///
531let isTwoAddress = 1 in {
532multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
533 SDNode OpNode, Intrinsic F32Int,
534 bit Commutable = 0> {
535 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000536 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000537 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000538 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
539 let isCommutable = Commutable;
540 }
541
542 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000543 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000544 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000545 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
546
547 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000548 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000549 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000550 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
551 let isCommutable = Commutable;
552 }
553
554 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000555 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000556 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000557 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000558
559 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000560 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000561 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000562 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
563 let isCommutable = Commutable;
564 }
565
566 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000567 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000568 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000569 [(set VR128:$dst, (F32Int VR128:$src1,
570 sse_load_f32:$src2))]>;
571}
572}
573
574// Arithmetic instructions
575defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
576defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
577defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
578defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
579
580/// sse1_fp_binop_rm - Other SSE1 binops
581///
582/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
583/// instructions for a full-vector intrinsic form. Operations that map
584/// onto C operators don't use this form since they just use the plain
585/// vector form instead of having a separate vector intrinsic form.
586///
587/// This provides a total of eight "instructions".
588///
589let isTwoAddress = 1 in {
590multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
591 SDNode OpNode,
592 Intrinsic F32Int,
593 Intrinsic V4F32Int,
594 bit Commutable = 0> {
595
596 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000597 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000598 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000599 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
600 let isCommutable = Commutable;
601 }
602
603 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000604 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000605 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000606 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
607
608 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000609 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000610 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000611 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
612 let isCommutable = Commutable;
613 }
614
615 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000616 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000617 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000618 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000619
620 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000621 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000622 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000623 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
624 let isCommutable = Commutable;
625 }
626
627 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000628 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000629 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000630 [(set VR128:$dst, (F32Int VR128:$src1,
631 sse_load_f32:$src2))]>;
632
633 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000634 def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000635 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000636 [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
637 let isCommutable = Commutable;
638 }
639
640 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +0000641 def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000642 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000643 [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
644}
645}
646
647defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
648 int_x86_sse_max_ss, int_x86_sse_max_ps>;
649defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
650 int_x86_sse_min_ss, int_x86_sse_min_ps>;
651
652//===----------------------------------------------------------------------===//
653// SSE packed FP Instructions
654
655// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000656def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000657 "movaps\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000658let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000659def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000660 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000661 [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000662
Evan Chengb783fa32007-07-19 01:14:50 +0000663def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000664 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000665 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000666
Evan Chengb783fa32007-07-19 01:14:50 +0000667def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000668 "movups\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000669let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000670def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000671 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000672 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000673def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000674 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000675 [(store (v4f32 VR128:$src), addr:$dst)]>;
676
677// Intrinsic forms of MOVUPS load and store
Evan Cheng4e84e452007-08-30 05:49:43 +0000678let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000679def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000680 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000681 [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000682def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000683 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000684 [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000685
686let isTwoAddress = 1 in {
687 let AddedComplexity = 20 in {
688 def MOVLPSrm : PSI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000689 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000690 "movlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000691 [(set VR128:$dst,
692 (v4f32 (vector_shuffle VR128:$src1,
693 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
694 MOVLP_shuffle_mask)))]>;
695 def MOVHPSrm : PSI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000696 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000697 "movhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000698 [(set VR128:$dst,
699 (v4f32 (vector_shuffle VR128:$src1,
700 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
701 MOVHP_shuffle_mask)))]>;
702 } // AddedComplexity
703} // isTwoAddress
704
Evan Chengb783fa32007-07-19 01:14:50 +0000705def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000706 "movlps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000707 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
708 (iPTR 0))), addr:$dst)]>;
709
710// v2f64 extract element 1 is always custom lowered to unpack high to low
711// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +0000712def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000713 "movhps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000714 [(store (f64 (vector_extract
715 (v2f64 (vector_shuffle
716 (bc_v2f64 (v4f32 VR128:$src)), (undef),
717 UNPCKH_shuffle_mask)), (iPTR 0))),
718 addr:$dst)]>;
719
720let isTwoAddress = 1 in {
721let AddedComplexity = 15 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000722def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000723 "movlhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000724 [(set VR128:$dst,
725 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
726 MOVHP_shuffle_mask)))]>;
727
Evan Chengb783fa32007-07-19 01:14:50 +0000728def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000729 "movhlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000730 [(set VR128:$dst,
731 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
732 MOVHLPS_shuffle_mask)))]>;
733} // AddedComplexity
734} // isTwoAddress
735
736
737
738// Arithmetic
739
740/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
741///
742/// In addition, we also have a special variant of the scalar form here to
743/// represent the associated intrinsic operation. This form is unlike the
744/// plain scalar form, in that it takes an entire vector (instead of a
745/// scalar) and leaves the top elements undefined.
746///
747/// And, we have a special variant form for a full-vector intrinsic form.
748///
749/// These four forms can each have a reg or a mem operand, so there are a
750/// total of eight "instructions".
751///
752multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
753 SDNode OpNode,
754 Intrinsic F32Int,
755 Intrinsic V4F32Int,
756 bit Commutable = 0> {
757 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000758 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000759 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000760 [(set FR32:$dst, (OpNode FR32:$src))]> {
761 let isCommutable = Commutable;
762 }
763
764 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000765 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000766 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000767 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
768
769 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000770 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000771 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000772 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
773 let isCommutable = Commutable;
774 }
775
776 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000777 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000778 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000779 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000780
781 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000782 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000783 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000784 [(set VR128:$dst, (F32Int VR128:$src))]> {
785 let isCommutable = Commutable;
786 }
787
788 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000789 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000790 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000791 [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
792
793 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +0000794 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000795 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000796 [(set VR128:$dst, (V4F32Int VR128:$src))]> {
797 let isCommutable = Commutable;
798 }
799
800 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +0000801 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000802 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000803 [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
804}
805
806// Square root.
807defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
808 int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
809
810// Reciprocal approximations. Note that these typically require refinement
811// in order to obtain suitable precision.
812defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
813 int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
814defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
815 int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
816
817// Logical
818let isTwoAddress = 1 in {
819 let isCommutable = 1 in {
820 def ANDPSrr : PSI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000821 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000822 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000823 [(set VR128:$dst, (v2i64
824 (and VR128:$src1, VR128:$src2)))]>;
825 def ORPSrr : PSI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000826 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000827 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000828 [(set VR128:$dst, (v2i64
829 (or VR128:$src1, VR128:$src2)))]>;
830 def XORPSrr : PSI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000831 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000832 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000833 [(set VR128:$dst, (v2i64
834 (xor VR128:$src1, VR128:$src2)))]>;
835 }
836
837 def ANDPSrm : PSI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000838 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000839 "andps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000840 [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
841 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000842 def ORPSrm : PSI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000843 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000844 "orps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000845 [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
846 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000847 def XORPSrm : PSI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000848 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000849 "xorps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000850 [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
851 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000852 def ANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000853 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000854 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000855 [(set VR128:$dst,
856 (v2i64 (and (xor VR128:$src1,
857 (bc_v2i64 (v4i32 immAllOnesV))),
858 VR128:$src2)))]>;
859 def ANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000860 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000861 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000862 [(set VR128:$dst,
Evan Cheng8e92cd12007-07-19 23:34:10 +0000863 (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000864 (bc_v2i64 (v4i32 immAllOnesV))),
Evan Cheng8e92cd12007-07-19 23:34:10 +0000865 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000866}
867
868let isTwoAddress = 1 in {
869 def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000870 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000871 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000872 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
873 VR128:$src, imm:$cc))]>;
874 def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000875 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000876 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000877 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
878 (load addr:$src), imm:$cc))]>;
879}
880
881// Shuffle and unpack instructions
882let isTwoAddress = 1 in {
883 let isConvertibleToThreeAddress = 1 in // Convert to pshufd
884 def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000885 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000886 VR128:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000887 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000888 [(set VR128:$dst,
889 (v4f32 (vector_shuffle
890 VR128:$src1, VR128:$src2,
891 SHUFP_shuffle_mask:$src3)))]>;
892 def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000893 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000894 f128mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000895 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000896 [(set VR128:$dst,
897 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000898 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000899 SHUFP_shuffle_mask:$src3)))]>;
900
901 let AddedComplexity = 10 in {
902 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000903 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000904 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000905 [(set VR128:$dst,
906 (v4f32 (vector_shuffle
907 VR128:$src1, VR128:$src2,
908 UNPCKH_shuffle_mask)))]>;
909 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000910 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000911 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000912 [(set VR128:$dst,
913 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000914 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000915 UNPCKH_shuffle_mask)))]>;
916
917 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000918 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000919 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000920 [(set VR128:$dst,
921 (v4f32 (vector_shuffle
922 VR128:$src1, VR128:$src2,
923 UNPCKL_shuffle_mask)))]>;
924 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000925 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000926 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000927 [(set VR128:$dst,
928 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000929 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000930 UNPCKL_shuffle_mask)))]>;
931 } // AddedComplexity
932} // isTwoAddress
933
934// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +0000935def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000936 "movmskps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000937 [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000938def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000939 "movmskpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000940 [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
941
942// Prefetching loads.
943// TODO: no intrinsics for these?
Dan Gohman91888f02007-07-31 20:11:57 +0000944def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", []>;
945def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1\t$src", []>;
946def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2\t$src", []>;
947def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000948
949// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +0000950def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000951 "movntps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000952 [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
953
954// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +0000955def SFENCE : PSI<0xAE, MRM7m, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000956
957// MXCSR register
Evan Chengb783fa32007-07-19 01:14:50 +0000958def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000959 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000960def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
Dan Gohman91888f02007-07-31 20:11:57 +0000961 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000962
963// Alias instructions that map zero vector to pxor / xorp* for sse.
964// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
965let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000966def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000967 "xorps\t$dst, $dst",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000968 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
969
970// FR32 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +0000971def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000972 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000973 [(set VR128:$dst,
974 (v4f32 (scalar_to_vector FR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000975def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000976 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000977 [(set VR128:$dst,
978 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
979
980// FIXME: may not be able to eliminate this movss with coalescing the src and
981// dest register classes are different. We really want to write this pattern
982// like this:
983// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
984// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +0000985def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000986 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000987 [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
988 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000989def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000990 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000991 [(store (f32 (vector_extract (v4f32 VR128:$src),
992 (iPTR 0))), addr:$dst)]>;
993
994
995// Move to lower bits of a VR128, leaving upper bits alone.
996// Three operand (but two address) aliases.
997let isTwoAddress = 1 in {
998 def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000999 (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001000 "movss\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001001
1002 let AddedComplexity = 15 in
1003 def MOVLPSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001004 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001005 "movss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001006 [(set VR128:$dst,
1007 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
1008 MOVL_shuffle_mask)))]>;
1009}
1010
1011// Move to lower bits of a VR128 and zeroing upper bits.
1012// Loading from memory automatically zeroing upper bits.
1013let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00001014def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001015 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001016 [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
1017 (v4f32 (scalar_to_vector (loadf32 addr:$src))),
1018 MOVL_shuffle_mask)))]>;
1019
1020
1021//===----------------------------------------------------------------------===//
1022// SSE2 Instructions
1023//===----------------------------------------------------------------------===//
1024
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001025// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001026def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001027 "movsd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001028let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001029def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001030 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001031 [(set FR64:$dst, (loadf64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001032def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001033 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001034 [(store FR64:$src, addr:$dst)]>;
1035
1036// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001037def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001038 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001039 [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001040def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001041 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001042 [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001043def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001044 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001045 [(set FR32:$dst, (fround FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001046def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001047 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001048 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001049def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001050 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001051 [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001052def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001053 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001054 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
1055
1056// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001057def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001058 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001059 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
1060 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001061def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001062 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001063 [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
1064 Requires<[HasSSE2]>;
1065
1066// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +00001067def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001068 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001069 [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001070def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001071 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001072 [(set GR32:$dst, (int_x86_sse2_cvtsd2si
1073 (load addr:$src)))]>;
1074
1075// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +00001076def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001077 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001078 [(set GR32:$dst,
1079 (int_x86_sse2_cvttsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001080def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001081 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001082 [(set GR32:$dst, (int_x86_sse2_cvttsd2si
1083 (load addr:$src)))]>;
1084
1085// Comparison instructions
1086let isTwoAddress = 1 in {
1087 def CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001088 (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001089 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001090 def CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001091 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001092 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001093}
1094
Evan Cheng950aac02007-09-25 01:57:46 +00001095let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +00001096def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001097 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001098 [(X86cmp FR64:$src1, FR64:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001099def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001100 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001101 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
1102
Evan Cheng950aac02007-09-25 01:57:46 +00001103def NEW_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
1104 "ucomisd\t{$src2, $src1|$src1, $src2}",
1105 [(X86cmp_new FR64:$src1, FR64:$src2), (implicit EFLAGS)]>;
1106def NEW_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
1107 "ucomisd\t{$src2, $src1|$src1, $src2}",
1108 [(X86cmp_new FR64:$src1, (loadf64 addr:$src2)),
1109 (implicit EFLAGS)]>;
1110}
1111
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001112// Aliases to match intrinsics which expect XMM operand(s).
1113let isTwoAddress = 1 in {
1114 def Int_CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001115 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001116 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001117 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1118 VR128:$src, imm:$cc))]>;
1119 def Int_CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001120 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001121 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001122 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1123 (load addr:$src), imm:$cc))]>;
1124}
1125
Evan Cheng950aac02007-09-25 01:57:46 +00001126let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +00001127def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001128 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001129 [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001130def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001131 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001132 [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2))]>;
1133
Evan Chengb783fa32007-07-19 01:14:50 +00001134def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001135 "comisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001136 [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001137def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001138 "comisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001139 [(X86comi (v2f64 VR128:$src1), (load addr:$src2))]>;
1140
Evan Cheng950aac02007-09-25 01:57:46 +00001141def NEW_Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs),
1142 (ins VR128:$src1, VR128:$src2),
1143 "ucomisd\t{$src2, $src1|$src1, $src2}",
1144 [(X86ucomi_new (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
1145 (implicit EFLAGS)]>;
1146def NEW_Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),
1147 (ins VR128:$src1, f128mem:$src2),
1148 "ucomisd\t{$src2, $src1|$src1, $src2}",
1149 [(X86ucomi_new (v2f64 VR128:$src1), (load addr:$src2)),
1150 (implicit EFLAGS)]>;
1151
1152def NEW_Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs),
1153 (ins VR128:$src1, VR128:$src2),
1154 "comisd\t{$src2, $src1|$src1, $src2}",
1155 [(X86comi_new (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
1156 (implicit EFLAGS)]>;
1157def NEW_Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs),
1158 (ins VR128:$src1, f128mem:$src2),
1159 "comisd\t{$src2, $src1|$src1, $src2}",
1160 [(X86comi_new (v2f64 VR128:$src1), (load addr:$src2)),
1161 (implicit EFLAGS)]>;
1162} // Defs = EFLAGS]
1163
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001164// Aliases of packed SSE2 instructions for scalar use. These all have names that
1165// start with 'Fs'.
1166
1167// Alias instructions that map fld0 to pxor for sse.
Dan Gohman8aef09b2007-09-07 21:32:51 +00001168let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001169def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00001170 "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001171 Requires<[HasSSE2]>, TB, OpSize;
1172
1173// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
1174// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +00001175def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001176 "movapd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001177
1178// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
1179// disregarded.
Evan Cheng4e84e452007-08-30 05:49:43 +00001180let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001181def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001182 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +00001183 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001184
1185// Alias bitwise logical operations using SSE logical ops on packed FP values.
1186let isTwoAddress = 1 in {
1187let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +00001188 def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001189 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001190 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001191 def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001192 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001193 [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001194 def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001195 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001196 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
1197}
1198
Evan Chengb783fa32007-07-19 01:14:50 +00001199def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001200 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001201 [(set FR64:$dst, (X86fand FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001202 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001203def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001204 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001205 [(set FR64:$dst, (X86for FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001206 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001207def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001208 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001209 [(set FR64:$dst, (X86fxor FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001210 (memopfsf64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001211
1212def FsANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001213 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001214 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001215def FsANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001216 (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001217 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001218}
1219
1220/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
1221///
1222/// In addition, we also have a special variant of the scalar form here to
1223/// represent the associated intrinsic operation. This form is unlike the
1224/// plain scalar form, in that it takes an entire vector (instead of a scalar)
1225/// and leaves the top elements undefined.
1226///
1227/// These three forms can each be reg+reg or reg+mem, so there are a total of
1228/// six "instructions".
1229///
1230let isTwoAddress = 1 in {
1231multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1232 SDNode OpNode, Intrinsic F64Int,
1233 bit Commutable = 0> {
1234 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001235 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001236 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001237 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1238 let isCommutable = Commutable;
1239 }
1240
1241 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001242 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001243 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001244 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1245
1246 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001247 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001248 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001249 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1250 let isCommutable = Commutable;
1251 }
1252
1253 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001254 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001255 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001256 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001257
1258 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001259 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001260 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001261 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1262 let isCommutable = Commutable;
1263 }
1264
1265 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001266 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001267 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001268 [(set VR128:$dst, (F64Int VR128:$src1,
1269 sse_load_f64:$src2))]>;
1270}
1271}
1272
1273// Arithmetic instructions
1274defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
1275defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
1276defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
1277defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
1278
1279/// sse2_fp_binop_rm - Other SSE2 binops
1280///
1281/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
1282/// instructions for a full-vector intrinsic form. Operations that map
1283/// onto C operators don't use this form since they just use the plain
1284/// vector form instead of having a separate vector intrinsic form.
1285///
1286/// This provides a total of eight "instructions".
1287///
1288let isTwoAddress = 1 in {
1289multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1290 SDNode OpNode,
1291 Intrinsic F64Int,
1292 Intrinsic V2F64Int,
1293 bit Commutable = 0> {
1294
1295 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001296 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001297 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001298 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1299 let isCommutable = Commutable;
1300 }
1301
1302 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001303 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001304 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001305 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1306
1307 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001308 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001309 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001310 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1311 let isCommutable = Commutable;
1312 }
1313
1314 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001315 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001316 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001317 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001318
1319 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001320 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001321 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001322 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1323 let isCommutable = Commutable;
1324 }
1325
1326 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001327 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001328 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001329 [(set VR128:$dst, (F64Int VR128:$src1,
1330 sse_load_f64:$src2))]>;
1331
1332 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001333 def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001334 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001335 [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
1336 let isCommutable = Commutable;
1337 }
1338
1339 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +00001340 def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001341 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001342 [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
1343}
1344}
1345
1346defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
1347 int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
1348defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
1349 int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
1350
1351//===----------------------------------------------------------------------===//
1352// SSE packed FP Instructions
1353
1354// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001355def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001356 "movapd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001357let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001358def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001359 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001360 [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001361
Evan Chengb783fa32007-07-19 01:14:50 +00001362def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001363 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001364 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001365
Evan Chengb783fa32007-07-19 01:14:50 +00001366def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001367 "movupd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001368let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001369def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001370 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001371 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001372def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001373 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001374 [(store (v2f64 VR128:$src), addr:$dst)]>;
1375
1376// Intrinsic forms of MOVUPD load and store
Evan Chengb783fa32007-07-19 01:14:50 +00001377def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001378 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001379 [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001380def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001381 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001382 [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001383
1384let isTwoAddress = 1 in {
1385 let AddedComplexity = 20 in {
1386 def MOVLPDrm : PDI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001387 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001388 "movlpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001389 [(set VR128:$dst,
1390 (v2f64 (vector_shuffle VR128:$src1,
1391 (scalar_to_vector (loadf64 addr:$src2)),
1392 MOVLP_shuffle_mask)))]>;
1393 def MOVHPDrm : PDI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001394 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001395 "movhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001396 [(set VR128:$dst,
1397 (v2f64 (vector_shuffle VR128:$src1,
1398 (scalar_to_vector (loadf64 addr:$src2)),
1399 MOVHP_shuffle_mask)))]>;
1400 } // AddedComplexity
1401} // isTwoAddress
1402
Evan Chengb783fa32007-07-19 01:14:50 +00001403def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001404 "movlpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001405 [(store (f64 (vector_extract (v2f64 VR128:$src),
1406 (iPTR 0))), addr:$dst)]>;
1407
1408// v2f64 extract element 1 is always custom lowered to unpack high to low
1409// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +00001410def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001411 "movhpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001412 [(store (f64 (vector_extract
1413 (v2f64 (vector_shuffle VR128:$src, (undef),
1414 UNPCKH_shuffle_mask)), (iPTR 0))),
1415 addr:$dst)]>;
1416
1417// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001418def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001419 "cvtdq2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001420 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
1421 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001422def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001423 "cvtdq2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001424 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
Dan Gohman4a4f1512007-07-18 20:23:34 +00001425 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001426 TB, Requires<[HasSSE2]>;
1427
1428// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001429def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001430 "cvtdq2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001431 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
1432 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001433def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001434 "cvtdq2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001435 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
Dan Gohman4a4f1512007-07-18 20:23:34 +00001436 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001437 XS, Requires<[HasSSE2]>;
1438
Evan Chengb783fa32007-07-19 01:14:50 +00001439def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001440 "cvtps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001441 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001442def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001443 "cvtps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001444 [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1445 (load addr:$src)))]>;
1446// SSE2 packed instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001447def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001448 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001449 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
1450 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001451def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001452 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001453 [(set VR128:$dst, (int_x86_sse2_cvttps2dq
1454 (load addr:$src)))]>,
1455 XS, Requires<[HasSSE2]>;
1456
1457// SSE2 packed instructions with XD prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001458def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001459 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001460 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
1461 XD, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001462def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001463 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001464 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1465 (load addr:$src)))]>,
1466 XD, Requires<[HasSSE2]>;
1467
Evan Chengb783fa32007-07-19 01:14:50 +00001468def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001469 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001470 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001471def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001472 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001473 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1474 (load addr:$src)))]>;
1475
1476// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001477def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001478 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001479 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
1480 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001481def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001482 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001483 [(set VR128:$dst, (int_x86_sse2_cvtps2pd
1484 (load addr:$src)))]>,
1485 TB, Requires<[HasSSE2]>;
1486
Evan Chengb783fa32007-07-19 01:14:50 +00001487def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001488 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001489 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001490def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001491 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001492 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
1493 (load addr:$src)))]>;
1494
1495// Match intrinsics which expect XMM operand(s).
1496// Aliases for intrinsics
1497let isTwoAddress = 1 in {
1498def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001499 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001500 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001501 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1502 GR32:$src2))]>;
1503def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001504 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001505 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001506 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1507 (loadi32 addr:$src2)))]>;
1508def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001509 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001510 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001511 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1512 VR128:$src2))]>;
1513def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001514 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001515 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001516 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1517 (load addr:$src2)))]>;
1518def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001519 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001520 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001521 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1522 VR128:$src2))]>, XS,
1523 Requires<[HasSSE2]>;
1524def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001525 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001526 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001527 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1528 (load addr:$src2)))]>, XS,
1529 Requires<[HasSSE2]>;
1530}
1531
1532// Arithmetic
1533
1534/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
1535///
1536/// In addition, we also have a special variant of the scalar form here to
1537/// represent the associated intrinsic operation. This form is unlike the
1538/// plain scalar form, in that it takes an entire vector (instead of a
1539/// scalar) and leaves the top elements undefined.
1540///
1541/// And, we have a special variant form for a full-vector intrinsic form.
1542///
1543/// These four forms can each have a reg or a mem operand, so there are a
1544/// total of eight "instructions".
1545///
1546multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
1547 SDNode OpNode,
1548 Intrinsic F64Int,
1549 Intrinsic V2F64Int,
1550 bit Commutable = 0> {
1551 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001552 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001553 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001554 [(set FR64:$dst, (OpNode FR64:$src))]> {
1555 let isCommutable = Commutable;
1556 }
1557
1558 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001559 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001560 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001561 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
1562
1563 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001564 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001565 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001566 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
1567 let isCommutable = Commutable;
1568 }
1569
1570 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001571 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001572 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001573 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001574
1575 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001576 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001577 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001578 [(set VR128:$dst, (F64Int VR128:$src))]> {
1579 let isCommutable = Commutable;
1580 }
1581
1582 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001583 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001584 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001585 [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
1586
1587 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +00001588 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001589 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001590 [(set VR128:$dst, (V2F64Int VR128:$src))]> {
1591 let isCommutable = Commutable;
1592 }
1593
1594 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +00001595 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001596 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001597 [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
1598}
1599
1600// Square root.
1601defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
1602 int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
1603
1604// There is no f64 version of the reciprocal approximation instructions.
1605
1606// Logical
1607let isTwoAddress = 1 in {
1608 let isCommutable = 1 in {
1609 def ANDPDrr : PDI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001610 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001611 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001612 [(set VR128:$dst,
1613 (and (bc_v2i64 (v2f64 VR128:$src1)),
1614 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1615 def ORPDrr : PDI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001616 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001617 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001618 [(set VR128:$dst,
1619 (or (bc_v2i64 (v2f64 VR128:$src1)),
1620 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1621 def XORPDrr : PDI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001622 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001623 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001624 [(set VR128:$dst,
1625 (xor (bc_v2i64 (v2f64 VR128:$src1)),
1626 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1627 }
1628
1629 def ANDPDrm : PDI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001630 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001631 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001632 [(set VR128:$dst,
1633 (and (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001634 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001635 def ORPDrm : PDI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001636 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001637 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001638 [(set VR128:$dst,
1639 (or (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001640 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001641 def XORPDrm : PDI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001642 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001643 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001644 [(set VR128:$dst,
1645 (xor (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001646 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001647 def ANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001648 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001649 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001650 [(set VR128:$dst,
1651 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
1652 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1653 def ANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001654 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001655 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001656 [(set VR128:$dst,
1657 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001658 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001659}
1660
1661let isTwoAddress = 1 in {
1662 def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001663 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001664 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001665 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1666 VR128:$src, imm:$cc))]>;
1667 def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001668 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001669 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001670 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1671 (load addr:$src), imm:$cc))]>;
1672}
1673
1674// Shuffle and unpack instructions
1675let isTwoAddress = 1 in {
1676 def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001677 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001678 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001679 [(set VR128:$dst, (v2f64 (vector_shuffle
1680 VR128:$src1, VR128:$src2,
1681 SHUFP_shuffle_mask:$src3)))]>;
1682 def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001683 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001684 f128mem:$src2, i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001685 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001686 [(set VR128:$dst,
1687 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001688 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001689 SHUFP_shuffle_mask:$src3)))]>;
1690
1691 let AddedComplexity = 10 in {
1692 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001693 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001694 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001695 [(set VR128:$dst,
1696 (v2f64 (vector_shuffle
1697 VR128:$src1, VR128:$src2,
1698 UNPCKH_shuffle_mask)))]>;
1699 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001700 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001701 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001702 [(set VR128:$dst,
1703 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001704 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001705 UNPCKH_shuffle_mask)))]>;
1706
1707 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001708 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001709 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001710 [(set VR128:$dst,
1711 (v2f64 (vector_shuffle
1712 VR128:$src1, VR128:$src2,
1713 UNPCKL_shuffle_mask)))]>;
1714 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001715 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001716 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001717 [(set VR128:$dst,
1718 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001719 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001720 UNPCKL_shuffle_mask)))]>;
1721 } // AddedComplexity
1722} // isTwoAddress
1723
1724
1725//===----------------------------------------------------------------------===//
1726// SSE integer instructions
1727
1728// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001729def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001730 "movdqa\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001731let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001732def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001733 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001734 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001735def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001736 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001737 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001738let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001739def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001740 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001741 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001742 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001743def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001744 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001745 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001746 XS, Requires<[HasSSE2]>;
1747
Dan Gohman4a4f1512007-07-18 20:23:34 +00001748// Intrinsic forms of MOVDQU load and store
Evan Cheng4e84e452007-08-30 05:49:43 +00001749let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001750def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001751 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001752 [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
1753 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001754def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001755 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001756 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
1757 XS, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001758
1759let isTwoAddress = 1 in {
1760
1761multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
1762 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001763 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001764 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001765 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
1766 let isCommutable = Commutable;
1767 }
Evan Chengb783fa32007-07-19 01:14:50 +00001768 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001769 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001770 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001771 (bitconvert (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001772}
1773
1774multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
1775 string OpcodeStr, Intrinsic IntId> {
Evan Chengb783fa32007-07-19 01:14:50 +00001776 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001777 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001778 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001779 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001780 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001781 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001782 (bitconvert (memopv2i64 addr:$src2))))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001783 def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001784 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001785 [(set VR128:$dst, (IntId VR128:$src1,
1786 (scalar_to_vector (i32 imm:$src2))))]>;
1787}
1788
1789
1790/// PDI_binop_rm - Simple SSE2 binary operator.
1791multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1792 ValueType OpVT, bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001793 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001794 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001795 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
1796 let isCommutable = Commutable;
1797 }
Evan Chengb783fa32007-07-19 01:14:50 +00001798 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001799 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001800 [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001801 (bitconvert (memopv2i64 addr:$src2)))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001802}
1803
1804/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
1805///
1806/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
1807/// to collapse (bitconvert VT to VT) into its operand.
1808///
1809multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
1810 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001811 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001812 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001813 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
1814 let isCommutable = Commutable;
1815 }
Evan Chengb783fa32007-07-19 01:14:50 +00001816 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001817 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001818 [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001819}
1820
1821} // isTwoAddress
1822
1823// 128-bit Integer Arithmetic
1824
1825defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
1826defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
1827defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
1828defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
1829
1830defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
1831defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
1832defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
1833defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
1834
1835defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
1836defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
1837defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
1838defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
1839
1840defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
1841defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
1842defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
1843defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
1844
1845defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
1846
1847defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
1848defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
1849defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
1850
1851defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
1852
1853defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
1854defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
1855
1856
1857defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
1858defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
1859defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
1860defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
1861defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
1862
1863
1864defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
1865defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
1866defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
1867
1868defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
1869defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
1870defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
1871
1872defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
1873defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
1874// PSRAQ doesn't exist in SSE[1-3].
1875
1876// 128-bit logical shifts.
1877let isTwoAddress = 1 in {
1878 def PSLLDQri : PDIi8<0x73, MRM7r,
Evan Chengb783fa32007-07-19 01:14:50 +00001879 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001880 "pslldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001881 def PSRLDQri : PDIi8<0x73, MRM3r,
Evan Chengb783fa32007-07-19 01:14:50 +00001882 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001883 "psrldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001884 // PSRADQri doesn't exist in SSE[1-3].
1885}
1886
1887let Predicates = [HasSSE2] in {
1888 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
1889 (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1890 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
1891 (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1892 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
1893 (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1894}
1895
1896// Logical
1897defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
1898defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
1899defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
1900
1901let isTwoAddress = 1 in {
1902 def PANDNrr : PDI<0xDF, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001903 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001904 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001905 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1906 VR128:$src2)))]>;
1907
1908 def PANDNrm : PDI<0xDF, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001909 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001910 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001911 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
Dan Gohman7dc19012007-08-02 21:17:01 +00001912 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001913}
1914
1915// SSE2 Integer comparison
1916defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
1917defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
1918defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
1919defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
1920defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
1921defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
1922
1923// Pack instructions
1924defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
1925defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
1926defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
1927
1928// Shuffle and unpack instructions
1929def PSHUFDri : PDIi8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001930 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001931 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001932 [(set VR128:$dst, (v4i32 (vector_shuffle
1933 VR128:$src1, (undef),
1934 PSHUFD_shuffle_mask:$src2)))]>;
1935def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001936 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001937 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001938 [(set VR128:$dst, (v4i32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001939 (bc_v4i32(memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001940 (undef),
1941 PSHUFD_shuffle_mask:$src2)))]>;
1942
1943// SSE2 with ImmT == Imm8 and XS prefix.
1944def PSHUFHWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001945 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001946 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001947 [(set VR128:$dst, (v8i16 (vector_shuffle
1948 VR128:$src1, (undef),
1949 PSHUFHW_shuffle_mask:$src2)))]>,
1950 XS, Requires<[HasSSE2]>;
1951def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001952 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001953 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001954 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001955 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001956 (undef),
1957 PSHUFHW_shuffle_mask:$src2)))]>,
1958 XS, Requires<[HasSSE2]>;
1959
1960// SSE2 with ImmT == Imm8 and XD prefix.
1961def PSHUFLWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001962 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001963 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001964 [(set VR128:$dst, (v8i16 (vector_shuffle
1965 VR128:$src1, (undef),
1966 PSHUFLW_shuffle_mask:$src2)))]>,
1967 XD, Requires<[HasSSE2]>;
1968def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001969 (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001970 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001971 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001972 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001973 (undef),
1974 PSHUFLW_shuffle_mask:$src2)))]>,
1975 XD, Requires<[HasSSE2]>;
1976
1977
1978let isTwoAddress = 1 in {
1979 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001980 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001981 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001982 [(set VR128:$dst,
1983 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1984 UNPCKL_shuffle_mask)))]>;
1985 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001986 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001987 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001988 [(set VR128:$dst,
1989 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001990 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001991 UNPCKL_shuffle_mask)))]>;
1992 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001993 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001994 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001995 [(set VR128:$dst,
1996 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1997 UNPCKL_shuffle_mask)))]>;
1998 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001999 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002000 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002001 [(set VR128:$dst,
2002 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002003 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002004 UNPCKL_shuffle_mask)))]>;
2005 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002006 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002007 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002008 [(set VR128:$dst,
2009 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2010 UNPCKL_shuffle_mask)))]>;
2011 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002012 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002013 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002014 [(set VR128:$dst,
2015 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002016 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002017 UNPCKL_shuffle_mask)))]>;
2018 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002019 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002020 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002021 [(set VR128:$dst,
2022 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2023 UNPCKL_shuffle_mask)))]>;
2024 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002025 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002026 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002027 [(set VR128:$dst,
2028 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002029 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002030 UNPCKL_shuffle_mask)))]>;
2031
2032 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002033 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002034 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002035 [(set VR128:$dst,
2036 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
2037 UNPCKH_shuffle_mask)))]>;
2038 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002039 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002040 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002041 [(set VR128:$dst,
2042 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002043 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002044 UNPCKH_shuffle_mask)))]>;
2045 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002046 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002047 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002048 [(set VR128:$dst,
2049 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
2050 UNPCKH_shuffle_mask)))]>;
2051 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002052 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002053 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002054 [(set VR128:$dst,
2055 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002056 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002057 UNPCKH_shuffle_mask)))]>;
2058 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002059 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002060 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002061 [(set VR128:$dst,
2062 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2063 UNPCKH_shuffle_mask)))]>;
2064 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002065 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002066 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002067 [(set VR128:$dst,
2068 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002069 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002070 UNPCKH_shuffle_mask)))]>;
2071 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002072 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002073 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002074 [(set VR128:$dst,
2075 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2076 UNPCKH_shuffle_mask)))]>;
2077 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002078 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002079 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002080 [(set VR128:$dst,
2081 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002082 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002083 UNPCKH_shuffle_mask)))]>;
2084}
2085
2086// Extract / Insert
2087def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002088 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002089 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002090 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
2091 (iPTR imm:$src2)))]>;
2092let isTwoAddress = 1 in {
2093 def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002094 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002095 GR32:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00002096 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002097 [(set VR128:$dst,
2098 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
2099 GR32:$src2, (iPTR imm:$src3))))]>;
2100 def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002101 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002102 i16mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00002103 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002104 [(set VR128:$dst,
2105 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
2106 (i32 (anyext (loadi16 addr:$src2))),
2107 (iPTR imm:$src3))))]>;
2108}
2109
2110// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +00002111def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002112 "pmovmskb\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002113 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
2114
2115// Conditional store
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002116let Uses = [EDI] in
Evan Chengb783fa32007-07-19 01:14:50 +00002117def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
Dan Gohman91888f02007-07-31 20:11:57 +00002118 "maskmovdqu\t{$mask, $src|$src, $mask}",
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002119 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002120
2121// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +00002122def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002123 "movntpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002124 [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002125def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002126 "movntdq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002127 [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002128def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002129 "movnti\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002130 [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
2131 TB, Requires<[HasSSE2]>;
2132
2133// Flush cache
Evan Chengb783fa32007-07-19 01:14:50 +00002134def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002135 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002136 TB, Requires<[HasSSE2]>;
2137
2138// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +00002139def LFENCE : I<0xAE, MRM5m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002140 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002141def MFENCE : I<0xAE, MRM6m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002142 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
2143
2144
2145// Alias instructions that map zero vector to pxor / xorp* for sse.
2146// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
2147let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00002148 def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00002149 "pcmpeqd\t$dst, $dst",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002150 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
2151
2152// FR64 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +00002153def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002154 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002155 [(set VR128:$dst,
2156 (v2f64 (scalar_to_vector FR64:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002157def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002158 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002159 [(set VR128:$dst,
2160 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
2161
Evan Chengb783fa32007-07-19 01:14:50 +00002162def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002163 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002164 [(set VR128:$dst,
2165 (v4i32 (scalar_to_vector GR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002166def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002167 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002168 [(set VR128:$dst,
2169 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
2170
Evan Chengb783fa32007-07-19 01:14:50 +00002171def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002172 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002173 [(set FR32:$dst, (bitconvert GR32:$src))]>;
2174
Evan Chengb783fa32007-07-19 01:14:50 +00002175def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002176 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002177 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
2178
2179// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00002180def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002181 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002182 [(set VR128:$dst,
2183 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
2184 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002185def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002186 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002187 [(store (i64 (vector_extract (v2i64 VR128:$src),
2188 (iPTR 0))), addr:$dst)]>;
2189
2190// FIXME: may not be able to eliminate this movss with coalescing the src and
2191// dest register classes are different. We really want to write this pattern
2192// like this:
2193// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
2194// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +00002195def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002196 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002197 [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
2198 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002199def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002200 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002201 [(store (f64 (vector_extract (v2f64 VR128:$src),
2202 (iPTR 0))), addr:$dst)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002203def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002204 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002205 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
2206 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002207def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002208 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002209 [(store (i32 (vector_extract (v4i32 VR128:$src),
2210 (iPTR 0))), addr:$dst)]>;
2211
Evan Chengb783fa32007-07-19 01:14:50 +00002212def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002213 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002214 [(set GR32:$dst, (bitconvert FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002215def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002216 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002217 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
2218
2219
2220// Move to lower bits of a VR128, leaving upper bits alone.
2221// Three operand (but two address) aliases.
2222let isTwoAddress = 1 in {
2223 def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002224 (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002225 "movsd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002226
2227 let AddedComplexity = 15 in
2228 def MOVLPDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002229 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002230 "movsd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002231 [(set VR128:$dst,
2232 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
2233 MOVL_shuffle_mask)))]>;
2234}
2235
2236// Store / copy lower 64-bits of a XMM register.
Evan Chengb783fa32007-07-19 01:14:50 +00002237def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002238 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002239 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
2240
2241// Move to lower bits of a VR128 and zeroing upper bits.
2242// Loading from memory automatically zeroing upper bits.
2243let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002244 def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002245 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002246 [(set VR128:$dst,
2247 (v2f64 (vector_shuffle immAllZerosV,
2248 (v2f64 (scalar_to_vector
2249 (loadf64 addr:$src))),
2250 MOVL_shuffle_mask)))]>;
2251
2252let AddedComplexity = 15 in
2253// movd / movq to XMM register zero-extends
Evan Chengb783fa32007-07-19 01:14:50 +00002254def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002255 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002256 [(set VR128:$dst,
2257 (v4i32 (vector_shuffle immAllZerosV,
2258 (v4i32 (scalar_to_vector GR32:$src)),
2259 MOVL_shuffle_mask)))]>;
2260let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002261def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002262 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002263 [(set VR128:$dst,
2264 (v4i32 (vector_shuffle immAllZerosV,
2265 (v4i32 (scalar_to_vector (loadi32 addr:$src))),
2266 MOVL_shuffle_mask)))]>;
2267
2268// Moving from XMM to XMM but still clear upper 64 bits.
2269let AddedComplexity = 15 in
Evan Chengb783fa32007-07-19 01:14:50 +00002270def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002271 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002272 [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
2273 XS, Requires<[HasSSE2]>;
2274let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002275def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002276 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002277 [(set VR128:$dst, (int_x86_sse2_movl_dq
Dan Gohman4a4f1512007-07-18 20:23:34 +00002278 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002279 XS, Requires<[HasSSE2]>;
2280
2281
2282//===----------------------------------------------------------------------===//
2283// SSE3 Instructions
2284//===----------------------------------------------------------------------===//
2285
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002286// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00002287def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002288 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002289 [(set VR128:$dst, (v4f32 (vector_shuffle
2290 VR128:$src, (undef),
2291 MOVSHDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002292def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002293 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002294 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002295 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002296 MOVSHDUP_shuffle_mask)))]>;
2297
Evan Chengb783fa32007-07-19 01:14:50 +00002298def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002299 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002300 [(set VR128:$dst, (v4f32 (vector_shuffle
2301 VR128:$src, (undef),
2302 MOVSLDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002303def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002304 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002305 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002306 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002307 MOVSLDUP_shuffle_mask)))]>;
2308
Evan Chengb783fa32007-07-19 01:14:50 +00002309def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002310 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002311 [(set VR128:$dst, (v2f64 (vector_shuffle
2312 VR128:$src, (undef),
2313 SSE_splat_lo_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002314def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002315 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002316 [(set VR128:$dst,
2317 (v2f64 (vector_shuffle
2318 (scalar_to_vector (loadf64 addr:$src)),
2319 (undef),
2320 SSE_splat_lo_mask)))]>;
2321
2322// Arithmetic
2323let isTwoAddress = 1 in {
2324 def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002325 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002326 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002327 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2328 VR128:$src2))]>;
2329 def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002330 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002331 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002332 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2333 (load addr:$src2)))]>;
2334 def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002335 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002336 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002337 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2338 VR128:$src2))]>;
2339 def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002340 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002341 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002342 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2343 (load addr:$src2)))]>;
2344}
2345
Evan Chengb783fa32007-07-19 01:14:50 +00002346def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002347 "lddqu\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002348 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
2349
2350// Horizontal ops
2351class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002352 : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002353 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002354 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
2355class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002356 : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002357 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002358 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
2359class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002360 : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002361 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002362 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
2363class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002364 : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002365 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002366 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
2367
2368let isTwoAddress = 1 in {
2369 def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2370 def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2371 def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2372 def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2373 def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2374 def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2375 def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2376 def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2377}
2378
2379// Thread synchronization
Evan Chengb783fa32007-07-19 01:14:50 +00002380def MONITOR : I<0xC8, RawFrm, (outs), (ins), "monitor",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002381 [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002382def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002383 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
2384
2385// vector_shuffle v1, <undef> <1, 1, 3, 3>
2386let AddedComplexity = 15 in
2387def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2388 MOVSHDUP_shuffle_mask)),
2389 (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2390let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002391def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002392 MOVSHDUP_shuffle_mask)),
2393 (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
2394
2395// vector_shuffle v1, <undef> <0, 0, 2, 2>
2396let AddedComplexity = 15 in
2397 def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2398 MOVSLDUP_shuffle_mask)),
2399 (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2400let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002401 def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002402 MOVSLDUP_shuffle_mask)),
2403 (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
2404
2405//===----------------------------------------------------------------------===//
2406// SSSE3 Instructions
2407//===----------------------------------------------------------------------===//
2408
Bill Wendling3b15d722007-08-11 09:52:53 +00002409// SSSE3 Instruction Templates:
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002410//
Bill Wendling98680292007-08-10 06:22:27 +00002411// SS38I - SSSE3 instructions with T8 prefix.
2412// SS3AI - SSSE3 instructions with TA prefix.
Bill Wendling3b15d722007-08-11 09:52:53 +00002413//
2414// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
2415// uses the MMX registers. We put those instructions here because they better
2416// fit into the SSSE3 instruction category rather than the MMX category.
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002417
Evan Chengb783fa32007-07-19 01:14:50 +00002418class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
2419 list<dag> pattern>
Bill Wendling98680292007-08-10 06:22:27 +00002420 : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002421class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
2422 list<dag> pattern>
Bill Wendling98680292007-08-10 06:22:27 +00002423 : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002424
Bill Wendling98680292007-08-10 06:22:27 +00002425/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002426let isTwoAddress = 1 in {
Bill Wendling98680292007-08-10 06:22:27 +00002427 multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
2428 Intrinsic IntId64, Intrinsic IntId128,
2429 bit Commutable = 0> {
2430 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
2431 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2432 [(set VR64:$dst, (IntId64 VR64:$src))]> {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002433 let isCommutable = Commutable;
2434 }
Bill Wendling98680292007-08-10 06:22:27 +00002435 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
2436 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2437 [(set VR64:$dst,
2438 (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
2439
2440 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2441 (ins VR128:$src),
2442 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2443 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2444 OpSize {
2445 let isCommutable = Commutable;
2446 }
2447 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2448 (ins i128mem:$src),
2449 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2450 [(set VR128:$dst,
2451 (IntId128
2452 (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002453 }
2454}
2455
Bill Wendling98680292007-08-10 06:22:27 +00002456/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
2457let isTwoAddress = 1 in {
2458 multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
2459 Intrinsic IntId64, Intrinsic IntId128,
2460 bit Commutable = 0> {
2461 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2462 (ins VR64:$src),
2463 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2464 [(set VR64:$dst, (IntId64 VR64:$src))]> {
2465 let isCommutable = Commutable;
2466 }
2467 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2468 (ins i64mem:$src),
2469 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2470 [(set VR64:$dst,
2471 (IntId64
2472 (bitconvert (memopv4i16 addr:$src))))]>;
2473
2474 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2475 (ins VR128:$src),
2476 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2477 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2478 OpSize {
2479 let isCommutable = Commutable;
2480 }
2481 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2482 (ins i128mem:$src),
2483 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2484 [(set VR128:$dst,
2485 (IntId128
2486 (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
2487 }
2488}
2489
2490/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
2491let isTwoAddress = 1 in {
2492 multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
2493 Intrinsic IntId64, Intrinsic IntId128,
2494 bit Commutable = 0> {
2495 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2496 (ins VR64:$src),
2497 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2498 [(set VR64:$dst, (IntId64 VR64:$src))]> {
2499 let isCommutable = Commutable;
2500 }
2501 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2502 (ins i64mem:$src),
2503 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2504 [(set VR64:$dst,
2505 (IntId64
2506 (bitconvert (memopv2i32 addr:$src))))]>;
2507
2508 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2509 (ins VR128:$src),
2510 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2511 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2512 OpSize {
2513 let isCommutable = Commutable;
2514 }
2515 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2516 (ins i128mem:$src),
2517 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2518 [(set VR128:$dst,
2519 (IntId128
2520 (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
2521 }
2522}
2523
2524defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
2525 int_x86_ssse3_pabs_b,
2526 int_x86_ssse3_pabs_b_128>;
2527defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
2528 int_x86_ssse3_pabs_w,
2529 int_x86_ssse3_pabs_w_128>;
2530defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
2531 int_x86_ssse3_pabs_d,
2532 int_x86_ssse3_pabs_d_128>;
2533
2534/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
2535let isTwoAddress = 1 in {
2536 multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
2537 Intrinsic IntId64, Intrinsic IntId128,
2538 bit Commutable = 0> {
2539 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2540 (ins VR64:$src1, VR64:$src2),
2541 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2542 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2543 let isCommutable = Commutable;
2544 }
2545 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2546 (ins VR64:$src1, i64mem:$src2),
2547 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2548 [(set VR64:$dst,
2549 (IntId64 VR64:$src1,
2550 (bitconvert (memopv8i8 addr:$src2))))]>;
2551
2552 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2553 (ins VR128:$src1, VR128:$src2),
2554 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2555 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2556 OpSize {
2557 let isCommutable = Commutable;
2558 }
2559 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2560 (ins VR128:$src1, i128mem:$src2),
2561 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2562 [(set VR128:$dst,
2563 (IntId128 VR128:$src1,
2564 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
2565 }
2566}
2567
2568/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
2569let isTwoAddress = 1 in {
2570 multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
2571 Intrinsic IntId64, Intrinsic IntId128,
2572 bit Commutable = 0> {
2573 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2574 (ins VR64:$src1, VR64:$src2),
2575 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2576 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2577 let isCommutable = Commutable;
2578 }
2579 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2580 (ins VR64:$src1, i64mem:$src2),
2581 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2582 [(set VR64:$dst,
2583 (IntId64 VR64:$src1,
2584 (bitconvert (memopv4i16 addr:$src2))))]>;
2585
2586 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2587 (ins VR128:$src1, VR128:$src2),
2588 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2589 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2590 OpSize {
2591 let isCommutable = Commutable;
2592 }
2593 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2594 (ins VR128:$src1, i128mem:$src2),
2595 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2596 [(set VR128:$dst,
2597 (IntId128 VR128:$src1,
2598 (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
2599 }
2600}
2601
2602/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
2603let isTwoAddress = 1 in {
2604 multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
2605 Intrinsic IntId64, Intrinsic IntId128,
2606 bit Commutable = 0> {
2607 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2608 (ins VR64:$src1, VR64:$src2),
2609 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2610 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2611 let isCommutable = Commutable;
2612 }
2613 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2614 (ins VR64:$src1, i64mem:$src2),
2615 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2616 [(set VR64:$dst,
2617 (IntId64 VR64:$src1,
2618 (bitconvert (memopv2i32 addr:$src2))))]>;
2619
2620 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2621 (ins VR128:$src1, VR128:$src2),
2622 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2623 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2624 OpSize {
2625 let isCommutable = Commutable;
2626 }
2627 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2628 (ins VR128:$src1, i128mem:$src2),
2629 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2630 [(set VR128:$dst,
2631 (IntId128 VR128:$src1,
2632 (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
2633 }
2634}
2635
2636defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
2637 int_x86_ssse3_phadd_w,
2638 int_x86_ssse3_phadd_w_128, 1>;
2639defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
2640 int_x86_ssse3_phadd_d,
2641 int_x86_ssse3_phadd_d_128, 1>;
2642defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
2643 int_x86_ssse3_phadd_sw,
2644 int_x86_ssse3_phadd_sw_128, 1>;
2645defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
2646 int_x86_ssse3_phsub_w,
2647 int_x86_ssse3_phsub_w_128>;
2648defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
2649 int_x86_ssse3_phsub_d,
2650 int_x86_ssse3_phsub_d_128>;
2651defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
2652 int_x86_ssse3_phsub_sw,
2653 int_x86_ssse3_phsub_sw_128>;
2654defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
2655 int_x86_ssse3_pmadd_ub_sw,
2656 int_x86_ssse3_pmadd_ub_sw_128, 1>;
2657defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
2658 int_x86_ssse3_pmul_hr_sw,
2659 int_x86_ssse3_pmul_hr_sw_128, 1>;
2660defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
2661 int_x86_ssse3_pshuf_b,
2662 int_x86_ssse3_pshuf_b_128>;
2663defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
2664 int_x86_ssse3_psign_b,
2665 int_x86_ssse3_psign_b_128>;
2666defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
2667 int_x86_ssse3_psign_w,
2668 int_x86_ssse3_psign_w_128>;
2669defm PSIGND : SS3I_binop_rm_int_32<0x09, "psignd",
2670 int_x86_ssse3_psign_d,
2671 int_x86_ssse3_psign_d_128>;
2672
2673let isTwoAddress = 1 in {
Bill Wendling1dc817c2007-08-10 09:00:17 +00002674 def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2675 (ins VR64:$src1, VR64:$src2, i16imm:$src3),
2676 "palignr\t{$src2, $dst|$dst, $src2}",
2677 [(set VR64:$dst,
2678 (int_x86_ssse3_palign_r
2679 VR64:$src1, VR64:$src2,
2680 imm:$src3))]>;
2681 def PALIGNR64rm : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2682 (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
2683 "palignr\t{$src2, $dst|$dst, $src2}",
2684 [(set VR64:$dst,
2685 (int_x86_ssse3_palign_r
2686 VR64:$src1,
2687 (bitconvert (memopv2i32 addr:$src2)),
2688 imm:$src3))]>;
Bill Wendling98680292007-08-10 06:22:27 +00002689
Bill Wendling1dc817c2007-08-10 09:00:17 +00002690 def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2691 (ins VR128:$src1, VR128:$src2, i32imm:$src3),
2692 "palignr\t{$src2, $dst|$dst, $src2}",
2693 [(set VR128:$dst,
2694 (int_x86_ssse3_palign_r_128
2695 VR128:$src1, VR128:$src2,
2696 imm:$src3))]>, OpSize;
2697 def PALIGNR128rm : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2698 (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
2699 "palignr\t{$src2, $dst|$dst, $src2}",
2700 [(set VR128:$dst,
2701 (int_x86_ssse3_palign_r_128
2702 VR128:$src1,
2703 (bitconvert (memopv4i32 addr:$src2)),
2704 imm:$src3))]>, OpSize;
Bill Wendling98680292007-08-10 06:22:27 +00002705}
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002706
2707//===----------------------------------------------------------------------===//
2708// Non-Instruction Patterns
2709//===----------------------------------------------------------------------===//
2710
2711// 128-bit vector undef's.
Bill Wendling1dc817c2007-08-10 09:00:17 +00002712def : Pat<(v4f32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002713def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2714def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2715def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2716def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2717def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2718
2719// 128-bit vector all zero's.
2720def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2721def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2722def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2723def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2724def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2725
2726// 128-bit vector all one's.
2727def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2728def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2729def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2730def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2731def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
2732
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002733
2734// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
2735// 16-bits matter.
2736def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2737 Requires<[HasSSE2]>;
2738def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2739 Requires<[HasSSE2]>;
2740
2741// bit_convert
2742let Predicates = [HasSSE2] in {
2743 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
2744 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
2745 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
2746 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
2747 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
2748 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
2749 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
2750 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
2751 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
2752 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
2753 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
2754 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
2755 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
2756 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
2757 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
2758 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
2759 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
2760 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
2761 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
2762 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
2763 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
2764 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
2765 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
2766 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
2767 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
2768 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
2769 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
2770 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
2771 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
2772 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
2773}
2774
2775// Move scalar to XMM zero-extended
2776// movd to XMM register zero-extends
2777let AddedComplexity = 15 in {
2778def : Pat<(v8i16 (vector_shuffle immAllZerosV,
2779 (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2780 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2781def : Pat<(v16i8 (vector_shuffle immAllZerosV,
2782 (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2783 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2784// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
2785def : Pat<(v2f64 (vector_shuffle immAllZerosV,
2786 (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
2787 (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
2788def : Pat<(v4f32 (vector_shuffle immAllZerosV,
2789 (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
2790 (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
2791}
2792
2793// Splat v2f64 / v2i64
2794let AddedComplexity = 10 in {
2795def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2796 (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2797def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2798 (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2799def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2800 (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2801def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2802 (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2803}
2804
2805// Splat v4f32
2806def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
2807 (SHUFPSrri VR128:$src, VR128:$src, SSE_splat_mask:$sm)>,
2808 Requires<[HasSSE1]>;
2809
2810// Special unary SHUFPSrri case.
2811// FIXME: when we want non two-address code, then we should use PSHUFD?
2812def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
2813 SHUFP_unary_shuffle_mask:$sm),
2814 (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2815 Requires<[HasSSE1]>;
Dan Gohman7dc19012007-08-02 21:17:01 +00002816// Special unary SHUFPDrri case.
2817def : Pat<(vector_shuffle (v2f64 VR128:$src1), (undef),
2818 SHUFP_unary_shuffle_mask:$sm),
2819 (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2820 Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002821// Unary v4f32 shuffle with PSHUF* in order to fold a load.
Dan Gohman4a4f1512007-07-18 20:23:34 +00002822def : Pat<(vector_shuffle (memopv4f32 addr:$src1), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002823 SHUFP_unary_shuffle_mask:$sm),
2824 (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2825 Requires<[HasSSE2]>;
2826// Special binary v4i32 shuffle cases with SHUFPS.
2827def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
2828 PSHUFD_binary_shuffle_mask:$sm),
2829 (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2830 Requires<[HasSSE2]>;
2831def : Pat<(vector_shuffle (v4i32 VR128:$src1),
Dan Gohman4a4f1512007-07-18 20:23:34 +00002832 (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002833 (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2834 Requires<[HasSSE2]>;
2835
2836// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
2837let AddedComplexity = 10 in {
2838def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2839 UNPCKL_v_undef_shuffle_mask)),
2840 (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2841def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2842 UNPCKL_v_undef_shuffle_mask)),
2843 (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2844def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2845 UNPCKL_v_undef_shuffle_mask)),
2846 (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2847def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2848 UNPCKL_v_undef_shuffle_mask)),
2849 (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2850}
2851
2852// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
2853let AddedComplexity = 10 in {
2854def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2855 UNPCKH_v_undef_shuffle_mask)),
2856 (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2857def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2858 UNPCKH_v_undef_shuffle_mask)),
2859 (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2860def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2861 UNPCKH_v_undef_shuffle_mask)),
2862 (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2863def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2864 UNPCKH_v_undef_shuffle_mask)),
2865 (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2866}
2867
2868let AddedComplexity = 15 in {
2869// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
2870def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2871 MOVHP_shuffle_mask)),
2872 (MOVLHPSrr VR128:$src1, VR128:$src2)>;
2873
2874// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
2875def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2876 MOVHLPS_shuffle_mask)),
2877 (MOVHLPSrr VR128:$src1, VR128:$src2)>;
2878
2879// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
2880def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
2881 MOVHLPS_v_undef_shuffle_mask)),
2882 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2883def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
2884 MOVHLPS_v_undef_shuffle_mask)),
2885 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2886}
2887
2888let AddedComplexity = 20 in {
2889// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
2890// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
Dan Gohman4a4f1512007-07-18 20:23:34 +00002891def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002892 MOVLP_shuffle_mask)),
2893 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002894def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002895 MOVLP_shuffle_mask)),
2896 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002897def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002898 MOVHP_shuffle_mask)),
2899 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002900def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002901 MOVHP_shuffle_mask)),
2902 (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2903
Dan Gohman4a4f1512007-07-18 20:23:34 +00002904def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002905 MOVLP_shuffle_mask)),
2906 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002907def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002908 MOVLP_shuffle_mask)),
2909 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002910def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002911 MOVHP_shuffle_mask)),
2912 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002913def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002914 MOVLP_shuffle_mask)),
2915 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2916}
2917
2918let AddedComplexity = 15 in {
2919// Setting the lowest element in the vector.
2920def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2921 MOVL_shuffle_mask)),
2922 (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2923def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2924 MOVL_shuffle_mask)),
2925 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2926
2927// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
2928def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
2929 MOVLP_shuffle_mask)),
2930 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2931def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2932 MOVLP_shuffle_mask)),
2933 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2934}
2935
2936// Set lowest element and zero upper elements.
2937let AddedComplexity = 20 in
2938def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV,
2939 (v2f64 (scalar_to_vector (loadf64 addr:$src))),
2940 MOVL_shuffle_mask)),
2941 (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
2942
2943// FIXME: Temporary workaround since 2-wide shuffle is broken.
2944def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
2945 (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2946def : Pat<(int_x86_sse2_loadh_pd VR128:$src1, addr:$src2),
2947 (v2f64 (MOVHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2948def : Pat<(int_x86_sse2_loadl_pd VR128:$src1, addr:$src2),
2949 (v2f64 (MOVLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2950def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
2951 (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
2952 Requires<[HasSSE2]>;
2953def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (load addr:$src2), imm:$src3),
2954 (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
2955 Requires<[HasSSE2]>;
2956def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
2957 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2958def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (load addr:$src2)),
2959 (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2960def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
2961 (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2962def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (load addr:$src2)),
2963 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2964def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
2965 (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2966def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (load addr:$src2)),
2967 (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2968def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
2969 (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2970def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (load addr:$src2)),
2971 (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2972
2973// Some special case pandn patterns.
2974def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
2975 VR128:$src2)),
2976 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2977def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
2978 VR128:$src2)),
2979 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2980def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
2981 VR128:$src2)),
2982 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2983
2984def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002985 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002986 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2987def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002988 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002989 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2990def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002991 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002992 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2993
Evan Cheng51a49b22007-07-20 00:27:43 +00002994// Use movaps / movups for SSE integer load / store (one byte shorter).
Dan Gohman11821702007-07-27 17:16:43 +00002995def : Pat<(alignedloadv4i32 addr:$src),
2996 (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
2997def : Pat<(loadv4i32 addr:$src),
2998 (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
Evan Cheng51a49b22007-07-20 00:27:43 +00002999def : Pat<(alignedloadv2i64 addr:$src),
3000 (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
3001def : Pat<(loadv2i64 addr:$src),
3002 (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
3003
3004def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
3005 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3006def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
3007 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3008def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
3009 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3010def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
3011 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3012def : Pat<(store (v2i64 VR128:$src), addr:$dst),
3013 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3014def : Pat<(store (v4i32 VR128:$src), addr:$dst),
3015 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3016def : Pat<(store (v8i16 VR128:$src), addr:$dst),
3017 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3018def : Pat<(store (v16i8 VR128:$src), addr:$dst),
3019 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
Evan Cheng86ab7d32007-07-31 08:04:03 +00003020
3021// (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr)
3022def : Pat<(vector_extract
3023 (bc_v4i32 (v4f32 (scalar_to_vector (loadf32 addr:$src)))), (iPTR 0)),
Evan Cheng43a09ac2007-08-01 21:42:24 +00003024 (MOV32rm addr:$src)>, Requires<[HasSSE2]>;
Evan Cheng86ab7d32007-07-31 08:04:03 +00003025def : Pat<(vector_extract
3026 (bc_v2i64 (v2f64 (scalar_to_vector (loadf64 addr:$src)))), (iPTR 0)),
Evan Cheng43a09ac2007-08-01 21:42:24 +00003027 (MOV64rm addr:$src)>, Requires<[HasSSE2, In64BitMode]>;