blob: adb6399e2c75f29096945a5e4b45b3d7f6f79c3f [file] [log] [blame]
Arnold Schwaighofer373e8652007-10-12 21:30:57 +00001//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner081ce942007-12-29 20:36:04 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Dan Gohmanf17a25c2007-07-18 16:29:46 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 SSE instruction set, defining the instructions,
11// and properties of the instructions which are needed for code generation,
12// machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16
17//===----------------------------------------------------------------------===//
18// SSE specific DAG Nodes.
19//===----------------------------------------------------------------------===//
20
21def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
22 SDTCisFP<0>, SDTCisInt<2> ]>;
23
Dan Gohmanf17a25c2007-07-18 16:29:46 +000024def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
25def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
26def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
27 [SDNPCommutative, SDNPAssociative]>;
28def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
29 [SDNPCommutative, SDNPAssociative]>;
30def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
31 [SDNPCommutative, SDNPAssociative]>;
32def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
33def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
34def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
Evan Chengf37bf452007-10-01 18:12:48 +000035def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
Evan Cheng621216e2007-09-29 00:00:36 +000036def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
Nate Begemand77e59e2008-02-11 04:19:36 +000037def X86pextrb : SDNode<"X86ISD::PEXTRB",
38 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
39def X86pextrw : SDNode<"X86ISD::PEXTRW",
40 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
41def X86pinsrb : SDNode<"X86ISD::PINSRB",
42 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
43 SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
44def X86pinsrw : SDNode<"X86ISD::PINSRW",
45 SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
46 SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
47def X86insrtps : SDNode<"X86ISD::INSERTPS",
48 SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
49 SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
Evan Chenge9b9c672008-05-09 21:53:03 +000050def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
51 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
52def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
53 [SDNPHasChain, SDNPMayLoad]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +000054
55//===----------------------------------------------------------------------===//
Dan Gohmanf17a25c2007-07-18 16:29:46 +000056// SSE Complex Patterns
57//===----------------------------------------------------------------------===//
58
59// These are 'extloads' from a scalar to the low element of a vector, zeroing
60// the top elements. These are used for the SSE 'ss' and 'sd' instruction
61// forms.
62def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", [],
Chris Lattnerc90ee9c2008-01-10 07:59:24 +000063 [SDNPHasChain, SDNPMayLoad]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +000064def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", [],
Chris Lattnerc90ee9c2008-01-10 07:59:24 +000065 [SDNPHasChain, SDNPMayLoad]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +000066
67def ssmem : Operand<v4f32> {
68 let PrintMethod = "printf32mem";
69 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
70}
71def sdmem : Operand<v2f64> {
72 let PrintMethod = "printf64mem";
73 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
74}
75
76//===----------------------------------------------------------------------===//
77// SSE pattern fragments
78//===----------------------------------------------------------------------===//
79
Dan Gohmanf17a25c2007-07-18 16:29:46 +000080def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
81def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
82def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
83def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
84
Dan Gohman11821702007-07-27 17:16:43 +000085// Like 'store', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000086def alignedstore : PatFrag<(ops node:$val, node:$ptr),
87 (st node:$val, node:$ptr), [{
88 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
89 return !ST->isTruncatingStore() &&
90 ST->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +000091 ST->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +000092 return false;
93}]>;
94
Dan Gohman11821702007-07-27 17:16:43 +000095// Like 'load', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000096def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
97 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
98 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
99 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000100 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000101 return false;
102}]>;
103
Dan Gohman11821702007-07-27 17:16:43 +0000104def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
105def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000106def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
107def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
108def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
109def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>;
110
111// Like 'load', but uses special alignment checks suitable for use in
112// memory operands in most SSE instructions, which are required to
113// be naturally aligned on some targets but not on others.
114// FIXME: Actually implement support for targets that don't require the
115// alignment. This probably wants a subtarget predicate.
116def memop : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
117 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
118 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
119 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000120 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000121 return false;
122}]>;
123
Dan Gohman11821702007-07-27 17:16:43 +0000124def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
125def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000126def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
127def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
128def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
129def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
Nate Begeman9a58b8a2008-02-09 23:46:37 +0000130def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000131
Bill Wendling3b15d722007-08-11 09:52:53 +0000132// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
133// 16-byte boundary.
Nate Begeman9a58b8a2008-02-09 23:46:37 +0000134// FIXME: 8 byte alignment for mmx reads is not required
Bill Wendling3b15d722007-08-11 09:52:53 +0000135def memop64 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
136 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
137 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
138 LD->getAddressingMode() == ISD::UNINDEXED &&
139 LD->getAlignment() >= 8;
140 return false;
141}]>;
142
143def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
Bill Wendling3b15d722007-08-11 09:52:53 +0000144def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
145def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
146def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
147
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000148def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
149def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
150def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
151def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
152def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
153def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
154
155def fp32imm0 : PatLeaf<(f32 fpimm), [{
156 return N->isExactlyValue(+0.0);
157}]>;
158
159def PSxLDQ_imm : SDNodeXForm<imm, [{
160 // Transformation function: imm >> 3
161 return getI32Imm(N->getValue() >> 3);
162}]>;
163
164// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
165// SHUFP* etc. imm.
166def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
167 return getI8Imm(X86::getShuffleSHUFImmediate(N));
168}]>;
169
170// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
171// PSHUFHW imm.
172def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
173 return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
174}]>;
175
176// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
177// PSHUFLW imm.
178def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
179 return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
180}]>;
181
182def SSE_splat_mask : PatLeaf<(build_vector), [{
183 return X86::isSplatMask(N);
184}], SHUFFLE_get_shuf_imm>;
185
186def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
187 return X86::isSplatLoMask(N);
188}]>;
189
190def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
191 return X86::isMOVHLPSMask(N);
192}]>;
193
194def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
195 return X86::isMOVHLPS_v_undef_Mask(N);
196}]>;
197
198def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
199 return X86::isMOVHPMask(N);
200}]>;
201
202def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
203 return X86::isMOVLPMask(N);
204}]>;
205
206def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
207 return X86::isMOVLMask(N);
208}]>;
209
210def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
211 return X86::isMOVSHDUPMask(N);
212}]>;
213
214def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
215 return X86::isMOVSLDUPMask(N);
216}]>;
217
218def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
219 return X86::isUNPCKLMask(N);
220}]>;
221
222def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
223 return X86::isUNPCKHMask(N);
224}]>;
225
226def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
227 return X86::isUNPCKL_v_undef_Mask(N);
228}]>;
229
230def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
231 return X86::isUNPCKH_v_undef_Mask(N);
232}]>;
233
234def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
235 return X86::isPSHUFDMask(N);
236}], SHUFFLE_get_shuf_imm>;
237
238def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
239 return X86::isPSHUFHWMask(N);
240}], SHUFFLE_get_pshufhw_imm>;
241
242def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
243 return X86::isPSHUFLWMask(N);
244}], SHUFFLE_get_pshuflw_imm>;
245
246def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
247 return X86::isPSHUFDMask(N);
248}], SHUFFLE_get_shuf_imm>;
249
250def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
251 return X86::isSHUFPMask(N);
252}], SHUFFLE_get_shuf_imm>;
253
254def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
255 return X86::isSHUFPMask(N);
256}], SHUFFLE_get_shuf_imm>;
257
258//===----------------------------------------------------------------------===//
259// SSE scalar FP Instructions
260//===----------------------------------------------------------------------===//
261
262// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
263// scheduler into a branch sequence.
Evan Cheng950aac02007-09-25 01:57:46 +0000264// These are expanded by the scheduler.
265let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000266 def CMOV_FR32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000267 (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000268 "#CMOV_FR32 PSEUDO!",
Evan Cheng621216e2007-09-29 00:00:36 +0000269 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
270 EFLAGS))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000271 def CMOV_FR64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000272 (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000273 "#CMOV_FR64 PSEUDO!",
Evan Cheng621216e2007-09-29 00:00:36 +0000274 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
275 EFLAGS))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000276 def CMOV_V4F32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000277 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000278 "#CMOV_V4F32 PSEUDO!",
279 [(set VR128:$dst,
Evan Cheng621216e2007-09-29 00:00:36 +0000280 (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
281 EFLAGS)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000282 def CMOV_V2F64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000283 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000284 "#CMOV_V2F64 PSEUDO!",
285 [(set VR128:$dst,
Evan Cheng621216e2007-09-29 00:00:36 +0000286 (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
287 EFLAGS)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000288 def CMOV_V2I64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000289 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000290 "#CMOV_V2I64 PSEUDO!",
291 [(set VR128:$dst,
Evan Cheng621216e2007-09-29 00:00:36 +0000292 (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
Evan Cheng950aac02007-09-25 01:57:46 +0000293 EFLAGS)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000294}
295
296//===----------------------------------------------------------------------===//
297// SSE1 Instructions
298//===----------------------------------------------------------------------===//
299
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000300// Move Instructions
Chris Lattnerd1a9eb62008-01-11 06:59:07 +0000301let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000302def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000303 "movss\t{$src, $dst|$dst, $src}", []>;
Chris Lattner1a1932c2008-01-06 23:38:27 +0000304let isSimpleLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000305def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000306 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000307 [(set FR32:$dst, (loadf32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000308def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000309 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000310 [(store FR32:$src, addr:$dst)]>;
311
312// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000313def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000314 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000315 [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000316def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000317 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000318 [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000319def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000320 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000321 [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000322def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000323 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000324 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
325
326// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +0000327def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000328 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000329 [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000330def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000331 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000332 [(set GR32:$dst, (int_x86_sse_cvtss2si
333 (load addr:$src)))]>;
334
Dale Johannesen1fbb4a52007-10-30 22:15:38 +0000335// Match intrinisics which expect MM and XMM operand(s).
336def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
337 "cvtps2pi\t{$src, $dst|$dst, $src}",
338 [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
339def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
340 "cvtps2pi\t{$src, $dst|$dst, $src}",
341 [(set VR64:$dst, (int_x86_sse_cvtps2pi
342 (load addr:$src)))]>;
343def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
344 "cvttps2pi\t{$src, $dst|$dst, $src}",
345 [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
346def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
347 "cvttps2pi\t{$src, $dst|$dst, $src}",
348 [(set VR64:$dst, (int_x86_sse_cvttps2pi
349 (load addr:$src)))]>;
Evan Cheng3ea4d672008-03-05 08:19:16 +0000350let Constraints = "$src1 = $dst" in {
Dale Johannesen1fbb4a52007-10-30 22:15:38 +0000351 def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
352 (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
353 "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
354 [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
355 VR64:$src2))]>;
356 def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
357 (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
358 "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
359 [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
360 (load addr:$src2)))]>;
361}
362
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000363// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +0000364def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000365 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000366 [(set GR32:$dst,
367 (int_x86_sse_cvttss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000368def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000369 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000370 [(set GR32:$dst,
371 (int_x86_sse_cvttss2si(load addr:$src)))]>;
372
Evan Cheng3ea4d672008-03-05 08:19:16 +0000373let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000374 def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000375 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000376 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000377 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
378 GR32:$src2))]>;
379 def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000380 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000381 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000382 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
383 (loadi32 addr:$src2)))]>;
384}
385
386// Comparison instructions
Evan Cheng3ea4d672008-03-05 08:19:16 +0000387let Constraints = "$src1 = $dst" in {
Chris Lattnerd1a9eb62008-01-11 06:59:07 +0000388let neverHasSideEffects = 1 in
Chris Lattnera9f545f2007-12-16 20:12:41 +0000389 def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000390 (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000391 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Chris Lattnerd1a9eb62008-01-11 06:59:07 +0000392let neverHasSideEffects = 1, mayLoad = 1 in
Chris Lattnera9f545f2007-12-16 20:12:41 +0000393 def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000394 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000395 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000396}
397
Evan Cheng55687072007-09-14 21:48:26 +0000398let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +0000399def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000400 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000401 [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000402def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000403 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000404 [(X86cmp FR32:$src1, (loadf32 addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +0000405 (implicit EFLAGS)]>;
Evan Cheng55687072007-09-14 21:48:26 +0000406} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000407
408// Aliases to match intrinsics which expect XMM operand(s).
Evan Cheng3ea4d672008-03-05 08:19:16 +0000409let Constraints = "$src1 = $dst" in {
Chris Lattnera9f545f2007-12-16 20:12:41 +0000410 def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000411 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000412 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000413 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
414 VR128:$src, imm:$cc))]>;
Chris Lattnera9f545f2007-12-16 20:12:41 +0000415 def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000416 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000417 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000418 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
419 (load addr:$src), imm:$cc))]>;
420}
421
Evan Cheng55687072007-09-14 21:48:26 +0000422let Defs = [EFLAGS] in {
Evan Cheng621216e2007-09-29 00:00:36 +0000423def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000424 (ins VR128:$src1, VR128:$src2),
425 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000426 [(X86ucomi (v4f32 VR128:$src1), VR128:$src2),
Evan Cheng950aac02007-09-25 01:57:46 +0000427 (implicit EFLAGS)]>;
Evan Cheng621216e2007-09-29 00:00:36 +0000428def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000429 (ins VR128:$src1, f128mem:$src2),
430 "ucomiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000431 [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +0000432 (implicit EFLAGS)]>;
433
Evan Cheng621216e2007-09-29 00:00:36 +0000434def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000435 (ins VR128:$src1, VR128:$src2),
436 "comiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000437 [(X86comi (v4f32 VR128:$src1), VR128:$src2),
Evan Cheng950aac02007-09-25 01:57:46 +0000438 (implicit EFLAGS)]>;
Evan Cheng621216e2007-09-29 00:00:36 +0000439def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs),
Evan Cheng950aac02007-09-25 01:57:46 +0000440 (ins VR128:$src1, f128mem:$src2),
441 "comiss\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +0000442 [(X86comi (v4f32 VR128:$src1), (load addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +0000443 (implicit EFLAGS)]>;
Evan Cheng55687072007-09-14 21:48:26 +0000444} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000445
446// Aliases of packed SSE1 instructions for scalar use. These all have names that
447// start with 'Fs'.
448
449// Alias instructions that map fld0 to pxor for sse.
Chris Lattner17dab4a2008-01-10 05:45:39 +0000450let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000451def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000452 "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000453 Requires<[HasSSE1]>, TB, OpSize;
454
455// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
456// disregarded.
Chris Lattnerc90ee9c2008-01-10 07:59:24 +0000457let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000458def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000459 "movaps\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000460
461// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
462// disregarded.
Chris Lattner1a1932c2008-01-06 23:38:27 +0000463let isSimpleLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000464def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000465 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +0000466 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000467
468// Alias bitwise logical operations using SSE logical ops on packed FP values.
Evan Cheng3ea4d672008-03-05 08:19:16 +0000469let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000470let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000471 def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000472 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000473 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000474 def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000475 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000476 [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000477 def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000478 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000479 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
480}
481
Evan Chengb783fa32007-07-19 01:14:50 +0000482def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000483 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000484 [(set FR32:$dst, (X86fand FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000485 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000486def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000487 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000488 [(set FR32:$dst, (X86for FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000489 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000490def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000491 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000492 [(set FR32:$dst, (X86fxor FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000493 (memopfsf32 addr:$src2)))]>;
Chris Lattnerc90ee9c2008-01-10 07:59:24 +0000494let neverHasSideEffects = 1 in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000495def FsANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000496 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000497 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Chris Lattnerc90ee9c2008-01-10 07:59:24 +0000498
499let mayLoad = 1 in
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000500def FsANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000501 (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000502 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000503}
Chris Lattnerc90ee9c2008-01-10 07:59:24 +0000504}
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000505
506/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
507///
508/// In addition, we also have a special variant of the scalar form here to
509/// represent the associated intrinsic operation. This form is unlike the
510/// plain scalar form, in that it takes an entire vector (instead of a scalar)
511/// and leaves the top elements undefined.
512///
513/// These three forms can each be reg+reg or reg+mem, so there are a total of
514/// six "instructions".
515///
Evan Cheng3ea4d672008-03-05 08:19:16 +0000516let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000517multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
518 SDNode OpNode, Intrinsic F32Int,
519 bit Commutable = 0> {
520 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000521 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000522 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000523 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
524 let isCommutable = Commutable;
525 }
526
527 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000528 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000529 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000530 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
531
532 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000533 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000534 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000535 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
536 let isCommutable = Commutable;
537 }
538
539 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000540 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000541 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000542 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000543
544 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000545 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000546 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000547 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
548 let isCommutable = Commutable;
549 }
550
551 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000552 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000553 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000554 [(set VR128:$dst, (F32Int VR128:$src1,
555 sse_load_f32:$src2))]>;
556}
557}
558
559// Arithmetic instructions
560defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
561defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
562defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
563defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
564
565/// sse1_fp_binop_rm - Other SSE1 binops
566///
567/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
568/// instructions for a full-vector intrinsic form. Operations that map
569/// onto C operators don't use this form since they just use the plain
570/// vector form instead of having a separate vector intrinsic form.
571///
572/// This provides a total of eight "instructions".
573///
Evan Cheng3ea4d672008-03-05 08:19:16 +0000574let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000575multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
576 SDNode OpNode,
577 Intrinsic F32Int,
578 Intrinsic V4F32Int,
579 bit Commutable = 0> {
580
581 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000582 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000583 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000584 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
585 let isCommutable = Commutable;
586 }
587
588 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000589 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000590 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000591 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
592
593 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000594 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000595 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000596 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
597 let isCommutable = Commutable;
598 }
599
600 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000601 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000602 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000603 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000604
605 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000606 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000607 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000608 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
609 let isCommutable = Commutable;
610 }
611
612 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000613 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000614 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000615 [(set VR128:$dst, (F32Int VR128:$src1,
616 sse_load_f32:$src2))]>;
617
618 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000619 def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000620 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000621 [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
622 let isCommutable = Commutable;
623 }
624
625 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +0000626 def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000627 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000628 [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
629}
630}
631
632defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
633 int_x86_sse_max_ss, int_x86_sse_max_ps>;
634defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
635 int_x86_sse_min_ss, int_x86_sse_min_ps>;
636
637//===----------------------------------------------------------------------===//
638// SSE packed FP Instructions
639
640// Move Instructions
Chris Lattnerc90ee9c2008-01-10 07:59:24 +0000641let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000642def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000643 "movaps\t{$src, $dst|$dst, $src}", []>;
Chris Lattner1a1932c2008-01-06 23:38:27 +0000644let isSimpleLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000645def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000646 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000647 [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000648
Evan Chengb783fa32007-07-19 01:14:50 +0000649def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000650 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000651 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000652
Chris Lattnerd1a9eb62008-01-11 06:59:07 +0000653let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000654def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000655 "movups\t{$src, $dst|$dst, $src}", []>;
Chris Lattner1a1932c2008-01-06 23:38:27 +0000656let isSimpleLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000657def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000658 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000659 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000660def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000661 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000662 [(store (v4f32 VR128:$src), addr:$dst)]>;
663
664// Intrinsic forms of MOVUPS load and store
Chris Lattner1a1932c2008-01-06 23:38:27 +0000665let isSimpleLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000666def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000667 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000668 [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000669def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000670 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000671 [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000672
Evan Cheng3ea4d672008-03-05 08:19:16 +0000673let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000674 let AddedComplexity = 20 in {
675 def MOVLPSrm : PSI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000676 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000677 "movlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000678 [(set VR128:$dst,
679 (v4f32 (vector_shuffle VR128:$src1,
680 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
681 MOVLP_shuffle_mask)))]>;
682 def MOVHPSrm : PSI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000683 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000684 "movhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000685 [(set VR128:$dst,
686 (v4f32 (vector_shuffle VR128:$src1,
687 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
688 MOVHP_shuffle_mask)))]>;
689 } // AddedComplexity
Evan Cheng3ea4d672008-03-05 08:19:16 +0000690} // Constraints = "$src1 = $dst"
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000691
Evan Chengb783fa32007-07-19 01:14:50 +0000692def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000693 "movlps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000694 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
695 (iPTR 0))), addr:$dst)]>;
696
697// v2f64 extract element 1 is always custom lowered to unpack high to low
698// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +0000699def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000700 "movhps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000701 [(store (f64 (vector_extract
702 (v2f64 (vector_shuffle
703 (bc_v2f64 (v4f32 VR128:$src)), (undef),
704 UNPCKH_shuffle_mask)), (iPTR 0))),
705 addr:$dst)]>;
706
Evan Cheng3ea4d672008-03-05 08:19:16 +0000707let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000708let AddedComplexity = 15 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000709def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000710 "movlhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000711 [(set VR128:$dst,
712 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
713 MOVHP_shuffle_mask)))]>;
714
Evan Chengb783fa32007-07-19 01:14:50 +0000715def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000716 "movhlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000717 [(set VR128:$dst,
718 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
719 MOVHLPS_shuffle_mask)))]>;
720} // AddedComplexity
Evan Cheng3ea4d672008-03-05 08:19:16 +0000721} // Constraints = "$src1 = $dst"
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000722
723
724
725// Arithmetic
726
727/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
728///
729/// In addition, we also have a special variant of the scalar form here to
730/// represent the associated intrinsic operation. This form is unlike the
731/// plain scalar form, in that it takes an entire vector (instead of a
732/// scalar) and leaves the top elements undefined.
733///
734/// And, we have a special variant form for a full-vector intrinsic form.
735///
736/// These four forms can each have a reg or a mem operand, so there are a
737/// total of eight "instructions".
738///
739multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
740 SDNode OpNode,
741 Intrinsic F32Int,
742 Intrinsic V4F32Int,
743 bit Commutable = 0> {
744 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000745 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000746 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000747 [(set FR32:$dst, (OpNode FR32:$src))]> {
748 let isCommutable = Commutable;
749 }
750
751 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000752 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000753 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000754 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
755
756 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000757 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000758 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000759 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
760 let isCommutable = Commutable;
761 }
762
763 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000764 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000765 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000766 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000767
768 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000769 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000770 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000771 [(set VR128:$dst, (F32Int VR128:$src))]> {
772 let isCommutable = Commutable;
773 }
774
775 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000776 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000777 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000778 [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
779
780 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +0000781 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000782 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000783 [(set VR128:$dst, (V4F32Int VR128:$src))]> {
784 let isCommutable = Commutable;
785 }
786
787 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +0000788 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000789 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000790 [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
791}
792
793// Square root.
794defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
795 int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
796
797// Reciprocal approximations. Note that these typically require refinement
798// in order to obtain suitable precision.
799defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
800 int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
801defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
802 int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
803
804// Logical
Evan Cheng3ea4d672008-03-05 08:19:16 +0000805let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000806 let isCommutable = 1 in {
807 def ANDPSrr : PSI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000808 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000809 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000810 [(set VR128:$dst, (v2i64
811 (and VR128:$src1, VR128:$src2)))]>;
812 def ORPSrr : PSI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000813 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000814 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000815 [(set VR128:$dst, (v2i64
816 (or VR128:$src1, VR128:$src2)))]>;
817 def XORPSrr : PSI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000818 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000819 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000820 [(set VR128:$dst, (v2i64
821 (xor VR128:$src1, VR128:$src2)))]>;
822 }
823
824 def ANDPSrm : PSI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000825 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000826 "andps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000827 [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
828 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000829 def ORPSrm : PSI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000830 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000831 "orps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000832 [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
833 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000834 def XORPSrm : PSI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000835 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000836 "xorps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000837 [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
838 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000839 def ANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000840 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000841 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000842 [(set VR128:$dst,
843 (v2i64 (and (xor VR128:$src1,
844 (bc_v2i64 (v4i32 immAllOnesV))),
845 VR128:$src2)))]>;
846 def ANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000847 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000848 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000849 [(set VR128:$dst,
Evan Cheng8e92cd12007-07-19 23:34:10 +0000850 (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000851 (bc_v2i64 (v4i32 immAllOnesV))),
Evan Cheng8e92cd12007-07-19 23:34:10 +0000852 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000853}
854
Evan Cheng3ea4d672008-03-05 08:19:16 +0000855let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000856 def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000857 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000858 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000859 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
860 VR128:$src, imm:$cc))]>;
861 def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000862 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000863 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000864 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
865 (load addr:$src), imm:$cc))]>;
866}
867
868// Shuffle and unpack instructions
Evan Cheng3ea4d672008-03-05 08:19:16 +0000869let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000870 let isConvertibleToThreeAddress = 1 in // Convert to pshufd
871 def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000872 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000873 VR128:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000874 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000875 [(set VR128:$dst,
876 (v4f32 (vector_shuffle
877 VR128:$src1, VR128:$src2,
878 SHUFP_shuffle_mask:$src3)))]>;
879 def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000880 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000881 f128mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000882 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000883 [(set VR128:$dst,
884 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000885 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000886 SHUFP_shuffle_mask:$src3)))]>;
887
888 let AddedComplexity = 10 in {
889 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000890 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000891 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000892 [(set VR128:$dst,
893 (v4f32 (vector_shuffle
894 VR128:$src1, VR128:$src2,
895 UNPCKH_shuffle_mask)))]>;
896 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000897 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000898 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000899 [(set VR128:$dst,
900 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000901 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000902 UNPCKH_shuffle_mask)))]>;
903
904 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000905 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000906 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000907 [(set VR128:$dst,
908 (v4f32 (vector_shuffle
909 VR128:$src1, VR128:$src2,
910 UNPCKL_shuffle_mask)))]>;
911 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000912 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000913 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000914 [(set VR128:$dst,
915 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000916 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000917 UNPCKL_shuffle_mask)))]>;
918 } // AddedComplexity
Evan Cheng3ea4d672008-03-05 08:19:16 +0000919} // Constraints = "$src1 = $dst"
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000920
921// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +0000922def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000923 "movmskps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000924 [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000925def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000926 "movmskpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000927 [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
928
Evan Chengd1d68072008-03-08 00:58:38 +0000929// Prefetch intrinsic.
930def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
931 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
932def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
933 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
934def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
935 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
936def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
937 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000938
939// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +0000940def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000941 "movntps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000942 [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
943
944// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +0000945def SFENCE : PSI<0xAE, MRM7m, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000946
947// MXCSR register
Evan Chengb783fa32007-07-19 01:14:50 +0000948def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000949 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000950def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
Dan Gohman91888f02007-07-31 20:11:57 +0000951 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000952
953// Alias instructions that map zero vector to pxor / xorp* for sse.
Chris Lattner17dab4a2008-01-10 05:45:39 +0000954let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000955def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000956 "xorps\t$dst, $dst",
Chris Lattnere6aa3862007-11-25 00:24:49 +0000957 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000958
Evan Chenga15896e2008-03-12 07:02:50 +0000959let Predicates = [HasSSE1] in {
960 def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
961 def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
962 def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
963 def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
964 def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
965}
966
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000967// FR32 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +0000968def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000969 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000970 [(set VR128:$dst,
971 (v4f32 (scalar_to_vector FR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000972def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000973 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000974 [(set VR128:$dst,
975 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
976
977// FIXME: may not be able to eliminate this movss with coalescing the src and
978// dest register classes are different. We really want to write this pattern
979// like this:
980// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
981// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +0000982def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000983 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000984 [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
985 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000986def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000987 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000988 [(store (f32 (vector_extract (v4f32 VR128:$src),
989 (iPTR 0))), addr:$dst)]>;
990
991
992// Move to lower bits of a VR128, leaving upper bits alone.
993// Three operand (but two address) aliases.
Evan Cheng3ea4d672008-03-05 08:19:16 +0000994let Constraints = "$src1 = $dst" in {
Chris Lattnerd1a9eb62008-01-11 06:59:07 +0000995let neverHasSideEffects = 1 in
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000996 def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000997 (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000998 "movss\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000999
1000 let AddedComplexity = 15 in
1001 def MOVLPSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001002 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001003 "movss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001004 [(set VR128:$dst,
1005 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
1006 MOVL_shuffle_mask)))]>;
1007}
1008
1009// Move to lower bits of a VR128 and zeroing upper bits.
1010// Loading from memory automatically zeroing upper bits.
1011let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00001012def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001013 "movss\t{$src, $dst|$dst, $src}",
Evan Chenge9b9c672008-05-09 21:53:03 +00001014 [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
Evan Cheng40ee6e52008-05-08 00:57:18 +00001015 (loadf32 addr:$src))))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001016
Evan Chenge9b9c672008-05-09 21:53:03 +00001017def : Pat<(v4f32 (X86vzmovl (memopv4f32 addr:$src))),
Evan Cheng40ee6e52008-05-08 00:57:18 +00001018 (MOVZSS2PSrm addr:$src)>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001019
1020//===----------------------------------------------------------------------===//
1021// SSE2 Instructions
1022//===----------------------------------------------------------------------===//
1023
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001024// Move Instructions
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001025let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001026def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001027 "movsd\t{$src, $dst|$dst, $src}", []>;
Chris Lattner1a1932c2008-01-06 23:38:27 +00001028let isSimpleLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001029def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001030 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001031 [(set FR64:$dst, (loadf64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001032def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001033 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001034 [(store FR64:$src, addr:$dst)]>;
1035
1036// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001037def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001038 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001039 [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001040def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001041 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001042 [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001043def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001044 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001045 [(set FR32:$dst, (fround FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001046def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001047 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001048 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001049def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001050 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001051 [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001052def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001053 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001054 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
1055
1056// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001057def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001058 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001059 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
1060 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001061def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001062 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001063 [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
1064 Requires<[HasSSE2]>;
1065
1066// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +00001067def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001068 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001069 [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001070def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001071 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001072 [(set GR32:$dst, (int_x86_sse2_cvtsd2si
1073 (load addr:$src)))]>;
1074
Dale Johannesen1fbb4a52007-10-30 22:15:38 +00001075// Match intrinisics which expect MM and XMM operand(s).
1076def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
1077 "cvtpd2pi\t{$src, $dst|$dst, $src}",
1078 [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
1079def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
1080 "cvtpd2pi\t{$src, $dst|$dst, $src}",
1081 [(set VR64:$dst, (int_x86_sse_cvtpd2pi
1082 (load addr:$src)))]>;
1083def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
1084 "cvttpd2pi\t{$src, $dst|$dst, $src}",
1085 [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
1086def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
1087 "cvttpd2pi\t{$src, $dst|$dst, $src}",
1088 [(set VR64:$dst, (int_x86_sse_cvttpd2pi
1089 (load addr:$src)))]>;
1090def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
1091 "cvtpi2pd\t{$src, $dst|$dst, $src}",
1092 [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
1093def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
1094 "cvtpi2pd\t{$src, $dst|$dst, $src}",
1095 [(set VR128:$dst, (int_x86_sse_cvtpi2pd
1096 (load addr:$src)))]>;
1097
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001098// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +00001099def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001100 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001101 [(set GR32:$dst,
1102 (int_x86_sse2_cvttsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001103def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001104 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001105 [(set GR32:$dst, (int_x86_sse2_cvttsd2si
1106 (load addr:$src)))]>;
1107
1108// Comparison instructions
Evan Cheng3ea4d672008-03-05 08:19:16 +00001109let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
Evan Cheng653c7ac2007-12-20 19:57:09 +00001110 def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001111 (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001112 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001113let mayLoad = 1 in
Evan Cheng653c7ac2007-12-20 19:57:09 +00001114 def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001115 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001116 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001117}
1118
Evan Cheng950aac02007-09-25 01:57:46 +00001119let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +00001120def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001121 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001122 [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001123def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001124 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001125 [(X86cmp FR64:$src1, (loadf64 addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +00001126 (implicit EFLAGS)]>;
1127}
1128
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001129// Aliases to match intrinsics which expect XMM operand(s).
Evan Cheng3ea4d672008-03-05 08:19:16 +00001130let Constraints = "$src1 = $dst" in {
Evan Cheng653c7ac2007-12-20 19:57:09 +00001131 def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001132 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001133 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001134 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1135 VR128:$src, imm:$cc))]>;
Evan Cheng653c7ac2007-12-20 19:57:09 +00001136 def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001137 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001138 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001139 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1140 (load addr:$src), imm:$cc))]>;
1141}
1142
Evan Cheng950aac02007-09-25 01:57:46 +00001143let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +00001144def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001145 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001146 [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
1147 (implicit EFLAGS)]>;
1148def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001149 "ucomisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001150 [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)),
1151 (implicit EFLAGS)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001152
Evan Chengb783fa32007-07-19 01:14:50 +00001153def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001154 "comisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001155 [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
1156 (implicit EFLAGS)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001157def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001158 "comisd\t{$src2, $src1|$src1, $src2}",
Evan Cheng621216e2007-09-29 00:00:36 +00001159 [(X86comi (v2f64 VR128:$src1), (load addr:$src2)),
Evan Cheng950aac02007-09-25 01:57:46 +00001160 (implicit EFLAGS)]>;
1161} // Defs = EFLAGS]
1162
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001163// Aliases of packed SSE2 instructions for scalar use. These all have names that
1164// start with 'Fs'.
1165
1166// Alias instructions that map fld0 to pxor for sse.
Chris Lattner17dab4a2008-01-10 05:45:39 +00001167let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001168def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00001169 "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001170 Requires<[HasSSE2]>, TB, OpSize;
1171
1172// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
1173// disregarded.
Chris Lattnerc90ee9c2008-01-10 07:59:24 +00001174let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001175def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001176 "movapd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001177
1178// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
1179// disregarded.
Chris Lattner1a1932c2008-01-06 23:38:27 +00001180let isSimpleLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001181def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001182 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +00001183 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001184
1185// Alias bitwise logical operations using SSE logical ops on packed FP values.
Evan Cheng3ea4d672008-03-05 08:19:16 +00001186let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001187let isCommutable = 1 in {
Evan Cheng0e3e01d2008-05-02 07:53:32 +00001188 def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst),
1189 (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001190 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001191 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
Evan Cheng0e3e01d2008-05-02 07:53:32 +00001192 def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst),
1193 (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001194 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001195 [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
Evan Cheng0e3e01d2008-05-02 07:53:32 +00001196 def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst),
1197 (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001198 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001199 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
1200}
1201
Evan Cheng0e3e01d2008-05-02 07:53:32 +00001202def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst),
1203 (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001204 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001205 [(set FR64:$dst, (X86fand FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001206 (memopfsf64 addr:$src2)))]>;
Evan Cheng0e3e01d2008-05-02 07:53:32 +00001207def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst),
1208 (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001209 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001210 [(set FR64:$dst, (X86for FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001211 (memopfsf64 addr:$src2)))]>;
Evan Cheng0e3e01d2008-05-02 07:53:32 +00001212def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst),
1213 (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001214 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001215 [(set FR64:$dst, (X86fxor FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001216 (memopfsf64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001217
Chris Lattnerc90ee9c2008-01-10 07:59:24 +00001218let neverHasSideEffects = 1 in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001219def FsANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001220 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001221 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Chris Lattnerc90ee9c2008-01-10 07:59:24 +00001222let mayLoad = 1 in
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001223def FsANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001224 (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001225 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001226}
Chris Lattnerc90ee9c2008-01-10 07:59:24 +00001227}
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001228
1229/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
1230///
1231/// In addition, we also have a special variant of the scalar form here to
1232/// represent the associated intrinsic operation. This form is unlike the
1233/// plain scalar form, in that it takes an entire vector (instead of a scalar)
1234/// and leaves the top elements undefined.
1235///
1236/// These three forms can each be reg+reg or reg+mem, so there are a total of
1237/// six "instructions".
1238///
Evan Cheng3ea4d672008-03-05 08:19:16 +00001239let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001240multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1241 SDNode OpNode, Intrinsic F64Int,
1242 bit Commutable = 0> {
1243 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001244 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001245 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001246 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1247 let isCommutable = Commutable;
1248 }
1249
1250 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001251 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001252 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001253 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1254
1255 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001256 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001257 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001258 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1259 let isCommutable = Commutable;
1260 }
1261
1262 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001263 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001264 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001265 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001266
1267 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001268 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001269 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001270 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1271 let isCommutable = Commutable;
1272 }
1273
1274 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001275 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001276 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001277 [(set VR128:$dst, (F64Int VR128:$src1,
1278 sse_load_f64:$src2))]>;
1279}
1280}
1281
1282// Arithmetic instructions
1283defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
1284defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
1285defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
1286defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
1287
1288/// sse2_fp_binop_rm - Other SSE2 binops
1289///
1290/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
1291/// instructions for a full-vector intrinsic form. Operations that map
1292/// onto C operators don't use this form since they just use the plain
1293/// vector form instead of having a separate vector intrinsic form.
1294///
1295/// This provides a total of eight "instructions".
1296///
Evan Cheng3ea4d672008-03-05 08:19:16 +00001297let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001298multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1299 SDNode OpNode,
1300 Intrinsic F64Int,
1301 Intrinsic V2F64Int,
1302 bit Commutable = 0> {
1303
1304 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001305 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001306 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001307 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1308 let isCommutable = Commutable;
1309 }
1310
1311 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001312 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001313 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001314 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1315
1316 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001317 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001318 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001319 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1320 let isCommutable = Commutable;
1321 }
1322
1323 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001324 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001325 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001326 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001327
1328 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001329 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001330 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001331 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1332 let isCommutable = Commutable;
1333 }
1334
1335 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001336 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001337 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001338 [(set VR128:$dst, (F64Int VR128:$src1,
1339 sse_load_f64:$src2))]>;
1340
1341 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001342 def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001343 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001344 [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
1345 let isCommutable = Commutable;
1346 }
1347
1348 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +00001349 def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001350 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001351 [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
1352}
1353}
1354
1355defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
1356 int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
1357defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
1358 int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
1359
1360//===----------------------------------------------------------------------===//
1361// SSE packed FP Instructions
1362
1363// Move Instructions
Chris Lattnerc90ee9c2008-01-10 07:59:24 +00001364let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001365def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001366 "movapd\t{$src, $dst|$dst, $src}", []>;
Chris Lattner1a1932c2008-01-06 23:38:27 +00001367let isSimpleLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001368def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001369 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001370 [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001371
Evan Chengb783fa32007-07-19 01:14:50 +00001372def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001373 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001374 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001375
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001376let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001377def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001378 "movupd\t{$src, $dst|$dst, $src}", []>;
Chris Lattner1a1932c2008-01-06 23:38:27 +00001379let isSimpleLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001380def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001381 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001382 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001383def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001384 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001385 [(store (v2f64 VR128:$src), addr:$dst)]>;
1386
1387// Intrinsic forms of MOVUPD load and store
Evan Chengb783fa32007-07-19 01:14:50 +00001388def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001389 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001390 [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001391def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001392 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001393 [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001394
Evan Cheng3ea4d672008-03-05 08:19:16 +00001395let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001396 let AddedComplexity = 20 in {
1397 def MOVLPDrm : PDI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001398 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001399 "movlpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001400 [(set VR128:$dst,
1401 (v2f64 (vector_shuffle VR128:$src1,
1402 (scalar_to_vector (loadf64 addr:$src2)),
1403 MOVLP_shuffle_mask)))]>;
1404 def MOVHPDrm : PDI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001405 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001406 "movhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001407 [(set VR128:$dst,
1408 (v2f64 (vector_shuffle VR128:$src1,
1409 (scalar_to_vector (loadf64 addr:$src2)),
1410 MOVHP_shuffle_mask)))]>;
1411 } // AddedComplexity
Evan Cheng3ea4d672008-03-05 08:19:16 +00001412} // Constraints = "$src1 = $dst"
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001413
Evan Chengb783fa32007-07-19 01:14:50 +00001414def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001415 "movlpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001416 [(store (f64 (vector_extract (v2f64 VR128:$src),
1417 (iPTR 0))), addr:$dst)]>;
1418
1419// v2f64 extract element 1 is always custom lowered to unpack high to low
1420// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +00001421def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001422 "movhpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001423 [(store (f64 (vector_extract
1424 (v2f64 (vector_shuffle VR128:$src, (undef),
1425 UNPCKH_shuffle_mask)), (iPTR 0))),
1426 addr:$dst)]>;
1427
1428// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001429def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001430 "cvtdq2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001431 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
1432 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001433def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Evan Cheng14c97c32008-03-14 07:46:48 +00001434 "cvtdq2ps\t{$src, $dst|$dst, $src}",
1435 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
1436 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001437 TB, Requires<[HasSSE2]>;
1438
1439// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001440def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001441 "cvtdq2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001442 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
1443 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001444def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Evan Cheng14c97c32008-03-14 07:46:48 +00001445 "cvtdq2pd\t{$src, $dst|$dst, $src}",
1446 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
1447 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001448 XS, Requires<[HasSSE2]>;
1449
Evan Chengb783fa32007-07-19 01:14:50 +00001450def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Evan Cheng14c97c32008-03-14 07:46:48 +00001451 "cvtps2dq\t{$src, $dst|$dst, $src}",
1452 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001453def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001454 "cvtps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001455 [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1456 (load addr:$src)))]>;
1457// SSE2 packed instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001458def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001459 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001460 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
1461 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001462def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001463 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001464 [(set VR128:$dst, (int_x86_sse2_cvttps2dq
1465 (load addr:$src)))]>,
1466 XS, Requires<[HasSSE2]>;
1467
1468// SSE2 packed instructions with XD prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001469def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001470 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001471 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
1472 XD, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001473def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001474 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001475 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1476 (load addr:$src)))]>,
1477 XD, Requires<[HasSSE2]>;
1478
Evan Chengb783fa32007-07-19 01:14:50 +00001479def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001480 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001481 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
Evan Cheng14c97c32008-03-14 07:46:48 +00001482def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001483 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001484 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1485 (load addr:$src)))]>;
1486
1487// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001488def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001489 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001490 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
1491 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001492def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001493 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001494 [(set VR128:$dst, (int_x86_sse2_cvtps2pd
1495 (load addr:$src)))]>,
1496 TB, Requires<[HasSSE2]>;
1497
Evan Chengb783fa32007-07-19 01:14:50 +00001498def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001499 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001500 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001501def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001502 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001503 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
1504 (load addr:$src)))]>;
1505
1506// Match intrinsics which expect XMM operand(s).
1507// Aliases for intrinsics
Evan Cheng3ea4d672008-03-05 08:19:16 +00001508let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001509def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001510 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001511 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001512 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1513 GR32:$src2))]>;
1514def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001515 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001516 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001517 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1518 (loadi32 addr:$src2)))]>;
1519def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001520 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001521 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001522 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1523 VR128:$src2))]>;
1524def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001525 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001526 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001527 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1528 (load addr:$src2)))]>;
1529def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001530 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001531 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001532 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1533 VR128:$src2))]>, XS,
1534 Requires<[HasSSE2]>;
1535def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001536 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001537 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001538 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1539 (load addr:$src2)))]>, XS,
1540 Requires<[HasSSE2]>;
1541}
1542
1543// Arithmetic
1544
1545/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
1546///
1547/// In addition, we also have a special variant of the scalar form here to
1548/// represent the associated intrinsic operation. This form is unlike the
1549/// plain scalar form, in that it takes an entire vector (instead of a
1550/// scalar) and leaves the top elements undefined.
1551///
1552/// And, we have a special variant form for a full-vector intrinsic form.
1553///
1554/// These four forms can each have a reg or a mem operand, so there are a
1555/// total of eight "instructions".
1556///
1557multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
1558 SDNode OpNode,
1559 Intrinsic F64Int,
1560 Intrinsic V2F64Int,
1561 bit Commutable = 0> {
1562 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001563 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001564 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001565 [(set FR64:$dst, (OpNode FR64:$src))]> {
1566 let isCommutable = Commutable;
1567 }
1568
1569 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001570 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001571 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001572 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
1573
1574 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001575 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001576 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001577 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
1578 let isCommutable = Commutable;
1579 }
1580
1581 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001582 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001583 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001584 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001585
1586 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001587 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001588 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001589 [(set VR128:$dst, (F64Int VR128:$src))]> {
1590 let isCommutable = Commutable;
1591 }
1592
1593 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001594 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001595 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001596 [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
1597
1598 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +00001599 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001600 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001601 [(set VR128:$dst, (V2F64Int VR128:$src))]> {
1602 let isCommutable = Commutable;
1603 }
1604
1605 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +00001606 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001607 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001608 [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
1609}
1610
1611// Square root.
1612defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
1613 int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
1614
1615// There is no f64 version of the reciprocal approximation instructions.
1616
1617// Logical
Evan Cheng3ea4d672008-03-05 08:19:16 +00001618let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001619 let isCommutable = 1 in {
1620 def ANDPDrr : PDI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001621 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001622 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001623 [(set VR128:$dst,
1624 (and (bc_v2i64 (v2f64 VR128:$src1)),
1625 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1626 def ORPDrr : PDI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001627 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001628 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001629 [(set VR128:$dst,
1630 (or (bc_v2i64 (v2f64 VR128:$src1)),
1631 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1632 def XORPDrr : PDI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001633 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001634 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001635 [(set VR128:$dst,
1636 (xor (bc_v2i64 (v2f64 VR128:$src1)),
1637 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1638 }
1639
1640 def ANDPDrm : PDI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001641 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001642 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001643 [(set VR128:$dst,
1644 (and (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001645 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001646 def ORPDrm : PDI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001647 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001648 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001649 [(set VR128:$dst,
1650 (or (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001651 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001652 def XORPDrm : PDI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001653 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001654 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001655 [(set VR128:$dst,
1656 (xor (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001657 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001658 def ANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001659 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001660 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001661 [(set VR128:$dst,
1662 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
1663 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1664 def ANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001665 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001666 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001667 [(set VR128:$dst,
1668 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001669 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001670}
1671
Evan Cheng3ea4d672008-03-05 08:19:16 +00001672let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001673 def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
Evan Cheng14c97c32008-03-14 07:46:48 +00001674 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
1675 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
1676 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1677 VR128:$src, imm:$cc))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001678 def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
Evan Cheng14c97c32008-03-14 07:46:48 +00001679 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
1680 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
1681 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1682 (load addr:$src), imm:$cc))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001683}
1684
1685// Shuffle and unpack instructions
Evan Cheng3ea4d672008-03-05 08:19:16 +00001686let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001687 def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
Evan Cheng14c97c32008-03-14 07:46:48 +00001688 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
1689 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
1690 [(set VR128:$dst, (v2f64 (vector_shuffle
1691 VR128:$src1, VR128:$src2,
1692 SHUFP_shuffle_mask:$src3)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001693 def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001694 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001695 f128mem:$src2, i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001696 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001697 [(set VR128:$dst,
1698 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001699 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001700 SHUFP_shuffle_mask:$src3)))]>;
1701
1702 let AddedComplexity = 10 in {
1703 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001704 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001705 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001706 [(set VR128:$dst,
1707 (v2f64 (vector_shuffle
1708 VR128:$src1, VR128:$src2,
1709 UNPCKH_shuffle_mask)))]>;
1710 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001711 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001712 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001713 [(set VR128:$dst,
1714 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001715 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001716 UNPCKH_shuffle_mask)))]>;
1717
1718 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001719 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001720 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001721 [(set VR128:$dst,
1722 (v2f64 (vector_shuffle
1723 VR128:$src1, VR128:$src2,
1724 UNPCKL_shuffle_mask)))]>;
1725 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001726 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001727 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001728 [(set VR128:$dst,
1729 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001730 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001731 UNPCKL_shuffle_mask)))]>;
1732 } // AddedComplexity
Evan Cheng3ea4d672008-03-05 08:19:16 +00001733} // Constraints = "$src1 = $dst"
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001734
1735
1736//===----------------------------------------------------------------------===//
1737// SSE integer instructions
1738
1739// Move Instructions
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001740let neverHasSideEffects = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001741def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001742 "movdqa\t{$src, $dst|$dst, $src}", []>;
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001743let isSimpleLoad = 1, mayLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001744def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001745 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001746 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001747let mayStore = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001748def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001749 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001750 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001751let isSimpleLoad = 1, mayLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001752def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001753 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001754 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001755 XS, Requires<[HasSSE2]>;
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00001756let mayStore = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001757def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001758 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001759 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001760 XS, Requires<[HasSSE2]>;
1761
Dan Gohman4a4f1512007-07-18 20:23:34 +00001762// Intrinsic forms of MOVDQU load and store
Chris Lattner1a1932c2008-01-06 23:38:27 +00001763let isSimpleLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001764def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001765 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001766 [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
1767 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001768def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001769 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001770 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
1771 XS, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001772
Evan Cheng88004752008-03-05 08:11:27 +00001773let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001774
1775multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
1776 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001777 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001778 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001779 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
1780 let isCommutable = Commutable;
1781 }
Evan Chengb783fa32007-07-19 01:14:50 +00001782 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001783 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001784 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001785 (bitconvert (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001786}
1787
Evan Chengf90f8f82008-05-03 00:52:09 +00001788multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
1789 string OpcodeStr,
1790 Intrinsic IntId, Intrinsic IntId2> {
1791 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1792 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1793 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
1794 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
1795 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1796 [(set VR128:$dst, (IntId VR128:$src1,
1797 (bitconvert (memopv2i64 addr:$src2))))]>;
1798 def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
1799 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
1800 [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
1801}
1802
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001803/// PDI_binop_rm - Simple SSE2 binary operator.
1804multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1805 ValueType OpVT, bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001806 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001807 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001808 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
1809 let isCommutable = Commutable;
1810 }
Evan Chengb783fa32007-07-19 01:14:50 +00001811 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001812 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001813 [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001814 (bitconvert (memopv2i64 addr:$src2)))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001815}
1816
1817/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
1818///
1819/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
1820/// to collapse (bitconvert VT to VT) into its operand.
1821///
1822multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
1823 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001824 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001825 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001826 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
1827 let isCommutable = Commutable;
1828 }
Evan Chengb783fa32007-07-19 01:14:50 +00001829 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001830 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001831 [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001832}
1833
Evan Cheng3ea4d672008-03-05 08:19:16 +00001834} // Constraints = "$src1 = $dst"
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001835
1836// 128-bit Integer Arithmetic
1837
1838defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
1839defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
1840defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
1841defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
1842
1843defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
1844defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
1845defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
1846defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
1847
1848defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
1849defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
1850defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
1851defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
1852
1853defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
1854defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
1855defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
1856defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
1857
1858defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
1859
1860defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
1861defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
1862defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
1863
1864defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
1865
1866defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
1867defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
1868
1869
1870defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
1871defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
1872defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
1873defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
1874defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
1875
1876
Evan Chengf90f8f82008-05-03 00:52:09 +00001877defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
1878 int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
1879defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
1880 int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
1881defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
1882 int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001883
Evan Chengf90f8f82008-05-03 00:52:09 +00001884defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
1885 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
1886defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
1887 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
1888defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x72, MRM2r, "psrlq",
1889 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001890
Evan Chengf90f8f82008-05-03 00:52:09 +00001891defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
1892 int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
1893defm PSRAD : PDI_binop_rmi_int<0xE2, 0x71, MRM4r, "psrad",
1894 int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001895
1896// 128-bit logical shifts.
Evan Cheng3ea4d672008-03-05 08:19:16 +00001897let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001898 def PSLLDQri : PDIi8<0x73, MRM7r,
Evan Chengb783fa32007-07-19 01:14:50 +00001899 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001900 "pslldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001901 def PSRLDQri : PDIi8<0x73, MRM3r,
Evan Chengb783fa32007-07-19 01:14:50 +00001902 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001903 "psrldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001904 // PSRADQri doesn't exist in SSE[1-3].
1905}
1906
1907let Predicates = [HasSSE2] in {
1908 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
1909 (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1910 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
1911 (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1912 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
1913 (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1914}
1915
1916// Logical
1917defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
1918defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
1919defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
1920
Evan Cheng3ea4d672008-03-05 08:19:16 +00001921let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001922 def PANDNrr : PDI<0xDF, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001923 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001924 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001925 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1926 VR128:$src2)))]>;
1927
1928 def PANDNrm : PDI<0xDF, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001929 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001930 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001931 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
Dan Gohman7dc19012007-08-02 21:17:01 +00001932 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001933}
1934
1935// SSE2 Integer comparison
1936defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
1937defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
1938defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
1939defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
1940defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
1941defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
1942
1943// Pack instructions
1944defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
1945defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
1946defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
1947
1948// Shuffle and unpack instructions
1949def PSHUFDri : PDIi8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001950 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001951 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001952 [(set VR128:$dst, (v4i32 (vector_shuffle
1953 VR128:$src1, (undef),
1954 PSHUFD_shuffle_mask:$src2)))]>;
1955def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001956 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001957 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001958 [(set VR128:$dst, (v4i32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001959 (bc_v4i32(memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001960 (undef),
1961 PSHUFD_shuffle_mask:$src2)))]>;
1962
1963// SSE2 with ImmT == Imm8 and XS prefix.
1964def PSHUFHWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001965 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001966 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001967 [(set VR128:$dst, (v8i16 (vector_shuffle
1968 VR128:$src1, (undef),
1969 PSHUFHW_shuffle_mask:$src2)))]>,
1970 XS, Requires<[HasSSE2]>;
1971def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001972 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001973 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001974 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001975 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001976 (undef),
1977 PSHUFHW_shuffle_mask:$src2)))]>,
1978 XS, Requires<[HasSSE2]>;
1979
1980// SSE2 with ImmT == Imm8 and XD prefix.
1981def PSHUFLWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001982 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001983 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001984 [(set VR128:$dst, (v8i16 (vector_shuffle
1985 VR128:$src1, (undef),
1986 PSHUFLW_shuffle_mask:$src2)))]>,
1987 XD, Requires<[HasSSE2]>;
1988def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001989 (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001990 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001991 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001992 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001993 (undef),
1994 PSHUFLW_shuffle_mask:$src2)))]>,
1995 XD, Requires<[HasSSE2]>;
1996
1997
Evan Cheng3ea4d672008-03-05 08:19:16 +00001998let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001999 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002000 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002001 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002002 [(set VR128:$dst,
2003 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
2004 UNPCKL_shuffle_mask)))]>;
2005 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002006 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002007 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002008 [(set VR128:$dst,
2009 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002010 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002011 UNPCKL_shuffle_mask)))]>;
2012 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002013 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002014 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002015 [(set VR128:$dst,
2016 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
2017 UNPCKL_shuffle_mask)))]>;
2018 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002019 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002020 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002021 [(set VR128:$dst,
2022 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002023 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002024 UNPCKL_shuffle_mask)))]>;
2025 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002026 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002027 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002028 [(set VR128:$dst,
2029 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2030 UNPCKL_shuffle_mask)))]>;
2031 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002032 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002033 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002034 [(set VR128:$dst,
2035 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002036 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002037 UNPCKL_shuffle_mask)))]>;
2038 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002039 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002040 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002041 [(set VR128:$dst,
2042 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2043 UNPCKL_shuffle_mask)))]>;
2044 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002045 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002046 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002047 [(set VR128:$dst,
2048 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002049 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002050 UNPCKL_shuffle_mask)))]>;
2051
2052 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002053 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002054 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002055 [(set VR128:$dst,
2056 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
2057 UNPCKH_shuffle_mask)))]>;
2058 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002059 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002060 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002061 [(set VR128:$dst,
2062 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002063 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002064 UNPCKH_shuffle_mask)))]>;
2065 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002066 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002067 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002068 [(set VR128:$dst,
2069 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
2070 UNPCKH_shuffle_mask)))]>;
2071 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002072 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002073 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002074 [(set VR128:$dst,
2075 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002076 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002077 UNPCKH_shuffle_mask)))]>;
2078 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002079 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002080 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002081 [(set VR128:$dst,
2082 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2083 UNPCKH_shuffle_mask)))]>;
2084 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002085 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002086 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002087 [(set VR128:$dst,
2088 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002089 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002090 UNPCKH_shuffle_mask)))]>;
2091 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002092 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002093 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002094 [(set VR128:$dst,
2095 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2096 UNPCKH_shuffle_mask)))]>;
2097 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002098 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002099 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002100 [(set VR128:$dst,
2101 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00002102 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002103 UNPCKH_shuffle_mask)))]>;
2104}
2105
2106// Extract / Insert
2107def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002108 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002109 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002110 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
Nate Begemand77e59e2008-02-11 04:19:36 +00002111 imm:$src2))]>;
Evan Cheng3ea4d672008-03-05 08:19:16 +00002112let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002113 def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002114 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002115 GR32:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00002116 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002117 [(set VR128:$dst,
Nate Begemand77e59e2008-02-11 04:19:36 +00002118 (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002119 def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002120 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002121 i16mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00002122 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Nate Begemand77e59e2008-02-11 04:19:36 +00002123 [(set VR128:$dst,
2124 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
2125 imm:$src3))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002126}
2127
2128// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +00002129def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002130 "pmovmskb\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002131 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
2132
2133// Conditional store
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002134let Uses = [EDI] in
Evan Chengb783fa32007-07-19 01:14:50 +00002135def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
Dan Gohman91888f02007-07-31 20:11:57 +00002136 "maskmovdqu\t{$mask, $src|$src, $mask}",
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002137 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002138
2139// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +00002140def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002141 "movntpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002142 [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002143def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002144 "movntdq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002145 [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002146def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002147 "movnti\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002148 [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
2149 TB, Requires<[HasSSE2]>;
2150
2151// Flush cache
Evan Chengb783fa32007-07-19 01:14:50 +00002152def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002153 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002154 TB, Requires<[HasSSE2]>;
2155
2156// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +00002157def LFENCE : I<0xAE, MRM5m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002158 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002159def MFENCE : I<0xAE, MRM6m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002160 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
2161
Andrew Lenharth785610d2008-02-16 01:24:58 +00002162//TODO: custom lower this so as to never even generate the noop
2163def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
2164 (i8 0)), (NOOP)>;
2165def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
2166def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
2167def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
2168 (i8 1)), (MFENCE)>;
2169
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002170// Alias instructions that map zero vector to pxor / xorp* for sse.
Chris Lattner17dab4a2008-01-10 05:45:39 +00002171let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00002172 def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00002173 "pcmpeqd\t$dst, $dst",
Chris Lattnere6aa3862007-11-25 00:24:49 +00002174 [(set VR128:$dst, (v4i32 immAllOnesV))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002175
2176// FR64 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +00002177def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002178 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002179 [(set VR128:$dst,
2180 (v2f64 (scalar_to_vector FR64:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002181def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002182 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002183 [(set VR128:$dst,
2184 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
2185
Evan Chengb783fa32007-07-19 01:14:50 +00002186def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002187 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002188 [(set VR128:$dst,
2189 (v4i32 (scalar_to_vector GR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002190def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002191 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002192 [(set VR128:$dst,
2193 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
2194
Evan Chengb783fa32007-07-19 01:14:50 +00002195def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002196 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002197 [(set FR32:$dst, (bitconvert GR32:$src))]>;
2198
Evan Chengb783fa32007-07-19 01:14:50 +00002199def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002200 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002201 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
2202
2203// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00002204def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002205 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002206 [(set VR128:$dst,
2207 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
2208 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002209def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002210 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002211 [(store (i64 (vector_extract (v2i64 VR128:$src),
2212 (iPTR 0))), addr:$dst)]>;
2213
2214// FIXME: may not be able to eliminate this movss with coalescing the src and
2215// dest register classes are different. We really want to write this pattern
2216// like this:
2217// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
2218// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +00002219def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002220 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002221 [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
2222 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002223def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002224 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002225 [(store (f64 (vector_extract (v2f64 VR128:$src),
2226 (iPTR 0))), addr:$dst)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002227def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002228 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002229 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
2230 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002231def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002232 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002233 [(store (i32 (vector_extract (v4i32 VR128:$src),
2234 (iPTR 0))), addr:$dst)]>;
2235
Evan Chengb783fa32007-07-19 01:14:50 +00002236def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002237 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002238 [(set GR32:$dst, (bitconvert FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002239def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002240 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002241 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
2242
2243
2244// Move to lower bits of a VR128, leaving upper bits alone.
2245// Three operand (but two address) aliases.
Evan Cheng3ea4d672008-03-05 08:19:16 +00002246let Constraints = "$src1 = $dst" in {
Chris Lattnerd1a9eb62008-01-11 06:59:07 +00002247 let neverHasSideEffects = 1 in
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002248 def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002249 (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002250 "movsd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002251
2252 let AddedComplexity = 15 in
2253 def MOVLPDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002254 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002255 "movsd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002256 [(set VR128:$dst,
2257 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
2258 MOVL_shuffle_mask)))]>;
2259}
2260
2261// Store / copy lower 64-bits of a XMM register.
Evan Chengb783fa32007-07-19 01:14:50 +00002262def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002263 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002264 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
2265
2266// Move to lower bits of a VR128 and zeroing upper bits.
2267// Loading from memory automatically zeroing upper bits.
2268let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002269 def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002270 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002271 [(set VR128:$dst,
Evan Chenge9b9c672008-05-09 21:53:03 +00002272 (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
Evan Cheng40ee6e52008-05-08 00:57:18 +00002273 (loadf64 addr:$src))))))]>;
2274
Evan Chenge9b9c672008-05-09 21:53:03 +00002275def : Pat<(v2f64 (X86vzmovl (memopv2f64 addr:$src))),
Evan Cheng40ee6e52008-05-08 00:57:18 +00002276 (MOVZSD2PDrm addr:$src)>;
Evan Chenge9b9c672008-05-09 21:53:03 +00002277def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002278
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002279// movd / movq to XMM register zero-extends
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002280let AddedComplexity = 15 in {
Evan Chengb783fa32007-07-19 01:14:50 +00002281def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002282 "movd\t{$src, $dst|$dst, $src}",
Evan Chenge9b9c672008-05-09 21:53:03 +00002283 [(set VR128:$dst, (v4i32 (X86vzmovl
Evan Cheng40ee6e52008-05-08 00:57:18 +00002284 (v4i32 (scalar_to_vector GR32:$src)))))]>;
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002285// This is X86-64 only.
2286def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
2287 "mov{d|q}\t{$src, $dst|$dst, $src}",
Evan Chenge9b9c672008-05-09 21:53:03 +00002288 [(set VR128:$dst, (v2i64 (X86vzmovl
Evan Cheng40ee6e52008-05-08 00:57:18 +00002289 (v2i64 (scalar_to_vector GR64:$src)))))]>;
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002290}
2291
2292let AddedComplexity = 20 in {
Evan Chengb783fa32007-07-19 01:14:50 +00002293def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002294 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002295 [(set VR128:$dst,
Evan Chenge9b9c672008-05-09 21:53:03 +00002296 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
Evan Cheng40ee6e52008-05-08 00:57:18 +00002297 (loadi32 addr:$src))))))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002298def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002299 "movq\t{$src, $dst|$dst, $src}",
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002300 [(set VR128:$dst,
Evan Chenge9b9c672008-05-09 21:53:03 +00002301 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
Evan Cheng40ee6e52008-05-08 00:57:18 +00002302 (loadi64 addr:$src))))))]>, XS,
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002303 Requires<[HasSSE2]>;
2304}
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002305
Evan Chenge9b9c672008-05-09 21:53:03 +00002306def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
2307
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002308// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
2309// IA32 document. movq xmm1, xmm2 does clear the high bits.
2310let AddedComplexity = 15 in
2311def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2312 "movq\t{$src, $dst|$dst, $src}",
Evan Chenge9b9c672008-05-09 21:53:03 +00002313 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002314 XS, Requires<[HasSSE2]>;
2315
2316let AddedComplexity = 20 in
2317def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
2318 "movq\t{$src, $dst|$dst, $src}",
Evan Chenge9b9c672008-05-09 21:53:03 +00002319 [(set VR128:$dst, (v2i64 (X86vzmovl
Evan Cheng40ee6e52008-05-08 00:57:18 +00002320 (memopv2i64 addr:$src))))]>,
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002321 XS, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002322
2323//===----------------------------------------------------------------------===//
2324// SSE3 Instructions
2325//===----------------------------------------------------------------------===//
2326
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002327// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00002328def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002329 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002330 [(set VR128:$dst, (v4f32 (vector_shuffle
2331 VR128:$src, (undef),
2332 MOVSHDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002333def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002334 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002335 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002336 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002337 MOVSHDUP_shuffle_mask)))]>;
2338
Evan Chengb783fa32007-07-19 01:14:50 +00002339def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002340 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002341 [(set VR128:$dst, (v4f32 (vector_shuffle
2342 VR128:$src, (undef),
2343 MOVSLDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002344def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002345 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002346 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002347 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002348 MOVSLDUP_shuffle_mask)))]>;
2349
Evan Chengb783fa32007-07-19 01:14:50 +00002350def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002351 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002352 [(set VR128:$dst, (v2f64 (vector_shuffle
2353 VR128:$src, (undef),
2354 SSE_splat_lo_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002355def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002356 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002357 [(set VR128:$dst,
2358 (v2f64 (vector_shuffle
2359 (scalar_to_vector (loadf64 addr:$src)),
2360 (undef),
2361 SSE_splat_lo_mask)))]>;
2362
2363// Arithmetic
Evan Cheng3ea4d672008-03-05 08:19:16 +00002364let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002365 def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002366 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002367 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002368 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2369 VR128:$src2))]>;
2370 def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002371 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002372 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002373 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2374 (load addr:$src2)))]>;
2375 def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002376 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002377 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002378 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2379 VR128:$src2))]>;
2380 def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002381 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002382 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002383 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2384 (load addr:$src2)))]>;
2385}
2386
Evan Chengb783fa32007-07-19 01:14:50 +00002387def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002388 "lddqu\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002389 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
2390
2391// Horizontal ops
2392class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002393 : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002394 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002395 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
2396class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002397 : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002398 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002399 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
2400class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002401 : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002402 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002403 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
2404class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002405 : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002406 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002407 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
2408
Evan Cheng3ea4d672008-03-05 08:19:16 +00002409let Constraints = "$src1 = $dst" in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002410 def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2411 def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2412 def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2413 def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2414 def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2415 def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2416 def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2417 def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2418}
2419
2420// Thread synchronization
Evan Chengb783fa32007-07-19 01:14:50 +00002421def MONITOR : I<0xC8, RawFrm, (outs), (ins), "monitor",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002422 [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002423def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002424 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
2425
2426// vector_shuffle v1, <undef> <1, 1, 3, 3>
2427let AddedComplexity = 15 in
2428def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2429 MOVSHDUP_shuffle_mask)),
2430 (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2431let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002432def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002433 MOVSHDUP_shuffle_mask)),
2434 (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
2435
2436// vector_shuffle v1, <undef> <0, 0, 2, 2>
2437let AddedComplexity = 15 in
2438 def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2439 MOVSLDUP_shuffle_mask)),
2440 (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2441let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002442 def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002443 MOVSLDUP_shuffle_mask)),
2444 (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
2445
2446//===----------------------------------------------------------------------===//
2447// SSSE3 Instructions
2448//===----------------------------------------------------------------------===//
2449
Bill Wendling98680292007-08-10 06:22:27 +00002450/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
Nate Begeman9a58b8a2008-02-09 23:46:37 +00002451multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
2452 Intrinsic IntId64, Intrinsic IntId128> {
2453 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
2454 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2455 [(set VR64:$dst, (IntId64 VR64:$src))]>;
Bill Wendling98680292007-08-10 06:22:27 +00002456
Nate Begeman9a58b8a2008-02-09 23:46:37 +00002457 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
2458 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2459 [(set VR64:$dst,
2460 (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
2461
2462 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2463 (ins VR128:$src),
2464 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2465 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2466 OpSize;
2467
2468 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2469 (ins i128mem:$src),
2470 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2471 [(set VR128:$dst,
2472 (IntId128
2473 (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002474}
2475
Bill Wendling98680292007-08-10 06:22:27 +00002476/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
Nate Begeman9a58b8a2008-02-09 23:46:37 +00002477multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
2478 Intrinsic IntId64, Intrinsic IntId128> {
2479 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2480 (ins VR64:$src),
2481 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2482 [(set VR64:$dst, (IntId64 VR64:$src))]>;
Bill Wendling98680292007-08-10 06:22:27 +00002483
Nate Begeman9a58b8a2008-02-09 23:46:37 +00002484 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2485 (ins i64mem:$src),
2486 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2487 [(set VR64:$dst,
2488 (IntId64
2489 (bitconvert (memopv4i16 addr:$src))))]>;
2490
2491 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2492 (ins VR128:$src),
2493 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2494 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2495 OpSize;
2496
2497 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2498 (ins i128mem:$src),
2499 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2500 [(set VR128:$dst,
2501 (IntId128
2502 (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
Bill Wendling98680292007-08-10 06:22:27 +00002503}
2504
2505/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
Nate Begeman9a58b8a2008-02-09 23:46:37 +00002506multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
2507 Intrinsic IntId64, Intrinsic IntId128> {
2508 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2509 (ins VR64:$src),
2510 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2511 [(set VR64:$dst, (IntId64 VR64:$src))]>;
Bill Wendling98680292007-08-10 06:22:27 +00002512
Nate Begeman9a58b8a2008-02-09 23:46:37 +00002513 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2514 (ins i64mem:$src),
2515 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2516 [(set VR64:$dst,
2517 (IntId64
2518 (bitconvert (memopv2i32 addr:$src))))]>;
2519
2520 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2521 (ins VR128:$src),
2522 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2523 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2524 OpSize;
2525
2526 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2527 (ins i128mem:$src),
2528 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2529 [(set VR128:$dst,
2530 (IntId128
2531 (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
Bill Wendling98680292007-08-10 06:22:27 +00002532}
2533
2534defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
2535 int_x86_ssse3_pabs_b,
2536 int_x86_ssse3_pabs_b_128>;
2537defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
2538 int_x86_ssse3_pabs_w,
2539 int_x86_ssse3_pabs_w_128>;
2540defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
2541 int_x86_ssse3_pabs_d,
2542 int_x86_ssse3_pabs_d_128>;
2543
2544/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
Evan Cheng3ea4d672008-03-05 08:19:16 +00002545let Constraints = "$src1 = $dst" in {
Bill Wendling98680292007-08-10 06:22:27 +00002546 multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
2547 Intrinsic IntId64, Intrinsic IntId128,
2548 bit Commutable = 0> {
2549 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2550 (ins VR64:$src1, VR64:$src2),
2551 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2552 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2553 let isCommutable = Commutable;
2554 }
2555 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2556 (ins VR64:$src1, i64mem:$src2),
2557 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2558 [(set VR64:$dst,
2559 (IntId64 VR64:$src1,
2560 (bitconvert (memopv8i8 addr:$src2))))]>;
2561
2562 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2563 (ins VR128:$src1, VR128:$src2),
2564 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2565 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2566 OpSize {
2567 let isCommutable = Commutable;
2568 }
2569 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2570 (ins VR128:$src1, i128mem:$src2),
2571 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2572 [(set VR128:$dst,
2573 (IntId128 VR128:$src1,
2574 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
2575 }
2576}
2577
2578/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
Evan Cheng3ea4d672008-03-05 08:19:16 +00002579let Constraints = "$src1 = $dst" in {
Bill Wendling98680292007-08-10 06:22:27 +00002580 multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
2581 Intrinsic IntId64, Intrinsic IntId128,
2582 bit Commutable = 0> {
2583 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2584 (ins VR64:$src1, VR64:$src2),
2585 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2586 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2587 let isCommutable = Commutable;
2588 }
2589 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2590 (ins VR64:$src1, i64mem:$src2),
2591 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2592 [(set VR64:$dst,
2593 (IntId64 VR64:$src1,
2594 (bitconvert (memopv4i16 addr:$src2))))]>;
2595
2596 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2597 (ins VR128:$src1, VR128:$src2),
2598 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2599 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2600 OpSize {
2601 let isCommutable = Commutable;
2602 }
2603 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2604 (ins VR128:$src1, i128mem:$src2),
2605 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2606 [(set VR128:$dst,
2607 (IntId128 VR128:$src1,
2608 (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
2609 }
2610}
2611
2612/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
Evan Cheng3ea4d672008-03-05 08:19:16 +00002613let Constraints = "$src1 = $dst" in {
Bill Wendling98680292007-08-10 06:22:27 +00002614 multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
2615 Intrinsic IntId64, Intrinsic IntId128,
2616 bit Commutable = 0> {
2617 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2618 (ins VR64:$src1, VR64:$src2),
2619 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2620 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2621 let isCommutable = Commutable;
2622 }
2623 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2624 (ins VR64:$src1, i64mem:$src2),
2625 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2626 [(set VR64:$dst,
2627 (IntId64 VR64:$src1,
2628 (bitconvert (memopv2i32 addr:$src2))))]>;
2629
2630 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2631 (ins VR128:$src1, VR128:$src2),
2632 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2633 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2634 OpSize {
2635 let isCommutable = Commutable;
2636 }
2637 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2638 (ins VR128:$src1, i128mem:$src2),
2639 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2640 [(set VR128:$dst,
2641 (IntId128 VR128:$src1,
2642 (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
2643 }
2644}
2645
2646defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
2647 int_x86_ssse3_phadd_w,
2648 int_x86_ssse3_phadd_w_128, 1>;
2649defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
2650 int_x86_ssse3_phadd_d,
2651 int_x86_ssse3_phadd_d_128, 1>;
2652defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
2653 int_x86_ssse3_phadd_sw,
2654 int_x86_ssse3_phadd_sw_128, 1>;
2655defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
2656 int_x86_ssse3_phsub_w,
2657 int_x86_ssse3_phsub_w_128>;
2658defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
2659 int_x86_ssse3_phsub_d,
2660 int_x86_ssse3_phsub_d_128>;
2661defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
2662 int_x86_ssse3_phsub_sw,
2663 int_x86_ssse3_phsub_sw_128>;
2664defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
2665 int_x86_ssse3_pmadd_ub_sw,
2666 int_x86_ssse3_pmadd_ub_sw_128, 1>;
2667defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
2668 int_x86_ssse3_pmul_hr_sw,
2669 int_x86_ssse3_pmul_hr_sw_128, 1>;
2670defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
2671 int_x86_ssse3_pshuf_b,
2672 int_x86_ssse3_pshuf_b_128>;
2673defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
2674 int_x86_ssse3_psign_b,
2675 int_x86_ssse3_psign_b_128>;
2676defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
2677 int_x86_ssse3_psign_w,
2678 int_x86_ssse3_psign_w_128>;
2679defm PSIGND : SS3I_binop_rm_int_32<0x09, "psignd",
2680 int_x86_ssse3_psign_d,
2681 int_x86_ssse3_psign_d_128>;
2682
Evan Cheng3ea4d672008-03-05 08:19:16 +00002683let Constraints = "$src1 = $dst" in {
Bill Wendling1dc817c2007-08-10 09:00:17 +00002684 def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2685 (ins VR64:$src1, VR64:$src2, i16imm:$src3),
Dale Johannesen576b27e2007-10-11 20:58:37 +00002686 "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Bill Wendling1dc817c2007-08-10 09:00:17 +00002687 [(set VR64:$dst,
2688 (int_x86_ssse3_palign_r
2689 VR64:$src1, VR64:$src2,
2690 imm:$src3))]>;
2691 def PALIGNR64rm : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2692 (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
Dale Johannesen576b27e2007-10-11 20:58:37 +00002693 "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Bill Wendling1dc817c2007-08-10 09:00:17 +00002694 [(set VR64:$dst,
2695 (int_x86_ssse3_palign_r
2696 VR64:$src1,
2697 (bitconvert (memopv2i32 addr:$src2)),
2698 imm:$src3))]>;
Bill Wendling98680292007-08-10 06:22:27 +00002699
Bill Wendling1dc817c2007-08-10 09:00:17 +00002700 def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2701 (ins VR128:$src1, VR128:$src2, i32imm:$src3),
Dale Johannesen576b27e2007-10-11 20:58:37 +00002702 "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Bill Wendling1dc817c2007-08-10 09:00:17 +00002703 [(set VR128:$dst,
2704 (int_x86_ssse3_palign_r_128
2705 VR128:$src1, VR128:$src2,
2706 imm:$src3))]>, OpSize;
2707 def PALIGNR128rm : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2708 (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
Dale Johannesen576b27e2007-10-11 20:58:37 +00002709 "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Bill Wendling1dc817c2007-08-10 09:00:17 +00002710 [(set VR128:$dst,
2711 (int_x86_ssse3_palign_r_128
2712 VR128:$src1,
2713 (bitconvert (memopv4i32 addr:$src2)),
2714 imm:$src3))]>, OpSize;
Bill Wendling98680292007-08-10 06:22:27 +00002715}
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002716
2717//===----------------------------------------------------------------------===//
2718// Non-Instruction Patterns
2719//===----------------------------------------------------------------------===//
2720
Chris Lattnerdec9cb52008-01-24 08:07:48 +00002721// extload f32 -> f64. This matches load+fextend because we have a hack in
2722// the isel (PreprocessForFPConvert) that can introduce loads after dag combine.
2723// Since these loads aren't folded into the fextend, we have to match it
2724// explicitly here.
2725let Predicates = [HasSSE2] in
2726 def : Pat<(fextend (loadf32 addr:$src)),
2727 (CVTSS2SDrm addr:$src)>;
2728
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002729// bit_convert
2730let Predicates = [HasSSE2] in {
2731 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
2732 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
2733 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
2734 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
2735 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
2736 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
2737 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
2738 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
2739 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
2740 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
2741 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
2742 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
2743 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
2744 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
2745 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
2746 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
2747 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
2748 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
2749 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
2750 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
2751 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
2752 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
2753 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
2754 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
2755 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
2756 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
2757 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
2758 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
2759 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
2760 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
2761}
2762
2763// Move scalar to XMM zero-extended
2764// movd to XMM register zero-extends
2765let AddedComplexity = 15 in {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002766// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
Evan Chenge9b9c672008-05-09 21:53:03 +00002767def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002768 (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
Evan Chenge9b9c672008-05-09 21:53:03 +00002769def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002770 (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
Evan Chenge259e872008-05-09 23:37:55 +00002771def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
2772 (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002773}
2774
2775// Splat v2f64 / v2i64
2776let AddedComplexity = 10 in {
2777def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2778 (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2779def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2780 (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2781def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2782 (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2783def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2784 (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2785}
2786
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002787// Special unary SHUFPSrri case.
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002788def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
2789 SHUFP_unary_shuffle_mask:$sm)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002790 (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2791 Requires<[HasSSE1]>;
Dan Gohman7dc19012007-08-02 21:17:01 +00002792// Special unary SHUFPDrri case.
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002793def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef),
2794 SHUFP_unary_shuffle_mask:$sm)),
Dan Gohman7dc19012007-08-02 21:17:01 +00002795 (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2796 Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002797// Unary v4f32 shuffle with PSHUF* in order to fold a load.
Evan Chengbf8b2c52008-04-05 00:30:36 +00002798def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002799 SHUFP_unary_shuffle_mask:$sm),
2800 (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2801 Requires<[HasSSE2]>;
2802// Special binary v4i32 shuffle cases with SHUFPS.
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002803def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
2804 PSHUFD_binary_shuffle_mask:$sm)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002805 (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2806 Requires<[HasSSE2]>;
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002807def : Pat<(v4i32 (vector_shuffle VR128:$src1,
2808 (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002809 (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2810 Requires<[HasSSE2]>;
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002811// Special binary v2i64 shuffle cases using SHUFPDrri.
2812def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2813 SHUFP_shuffle_mask:$sm)),
2814 (SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>,
2815 Requires<[HasSSE2]>;
2816// Special unary SHUFPDrri case.
2817def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
2818 SHUFP_unary_shuffle_mask:$sm)),
2819 (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2820 Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002821
2822// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
2823let AddedComplexity = 10 in {
2824def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2825 UNPCKL_v_undef_shuffle_mask)),
2826 (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2827def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2828 UNPCKL_v_undef_shuffle_mask)),
2829 (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2830def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2831 UNPCKL_v_undef_shuffle_mask)),
2832 (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2833def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2834 UNPCKL_v_undef_shuffle_mask)),
2835 (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2836}
2837
2838// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
2839let AddedComplexity = 10 in {
2840def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2841 UNPCKH_v_undef_shuffle_mask)),
2842 (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2843def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2844 UNPCKH_v_undef_shuffle_mask)),
2845 (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2846def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2847 UNPCKH_v_undef_shuffle_mask)),
2848 (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2849def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2850 UNPCKH_v_undef_shuffle_mask)),
2851 (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2852}
2853
2854let AddedComplexity = 15 in {
2855// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
2856def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2857 MOVHP_shuffle_mask)),
2858 (MOVLHPSrr VR128:$src1, VR128:$src2)>;
2859
2860// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
2861def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2862 MOVHLPS_shuffle_mask)),
2863 (MOVHLPSrr VR128:$src1, VR128:$src2)>;
2864
2865// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
2866def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
2867 MOVHLPS_v_undef_shuffle_mask)),
2868 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2869def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
2870 MOVHLPS_v_undef_shuffle_mask)),
2871 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2872}
2873
2874let AddedComplexity = 20 in {
2875// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
2876// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
Dan Gohman4a4f1512007-07-18 20:23:34 +00002877def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002878 MOVLP_shuffle_mask)),
2879 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002880def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002881 MOVLP_shuffle_mask)),
2882 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002883def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002884 MOVHP_shuffle_mask)),
2885 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002886def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002887 MOVHP_shuffle_mask)),
2888 (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2889
Dan Gohman4a4f1512007-07-18 20:23:34 +00002890def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002891 MOVLP_shuffle_mask)),
2892 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002893def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002894 MOVLP_shuffle_mask)),
2895 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002896def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002897 MOVHP_shuffle_mask)),
2898 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002899def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002900 MOVLP_shuffle_mask)),
2901 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2902}
2903
2904let AddedComplexity = 15 in {
2905// Setting the lowest element in the vector.
2906def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2907 MOVL_shuffle_mask)),
2908 (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2909def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2910 MOVL_shuffle_mask)),
2911 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2912
2913// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
2914def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
2915 MOVLP_shuffle_mask)),
2916 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2917def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2918 MOVLP_shuffle_mask)),
2919 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2920}
2921
2922// Set lowest element and zero upper elements.
Evan Cheng15e8f5a2007-12-15 03:00:47 +00002923let AddedComplexity = 15 in
2924def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
2925 MOVL_shuffle_mask)),
2926 (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
Evan Chenge9b9c672008-05-09 21:53:03 +00002927def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
Evan Chengd09a8a02008-05-08 22:35:02 +00002928 (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002929
2930// FIXME: Temporary workaround since 2-wide shuffle is broken.
2931def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
2932 (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2933def : Pat<(int_x86_sse2_loadh_pd VR128:$src1, addr:$src2),
2934 (v2f64 (MOVHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2935def : Pat<(int_x86_sse2_loadl_pd VR128:$src1, addr:$src2),
2936 (v2f64 (MOVLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2937def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
2938 (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
2939 Requires<[HasSSE2]>;
2940def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (load addr:$src2), imm:$src3),
2941 (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
2942 Requires<[HasSSE2]>;
2943def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
2944 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2945def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (load addr:$src2)),
2946 (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2947def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
2948 (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2949def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (load addr:$src2)),
2950 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2951def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
2952 (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2953def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (load addr:$src2)),
2954 (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2955def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
2956 (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2957def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (load addr:$src2)),
2958 (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2959
2960// Some special case pandn patterns.
2961def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
2962 VR128:$src2)),
2963 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2964def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
2965 VR128:$src2)),
2966 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2967def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
2968 VR128:$src2)),
2969 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2970
2971def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002972 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002973 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2974def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002975 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002976 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2977def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002978 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002979 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2980
Nate Begeman78246ca2007-11-17 03:58:34 +00002981// vector -> vector casts
2982def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
2983 (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
2984def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
2985 (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
2986
Evan Cheng51a49b22007-07-20 00:27:43 +00002987// Use movaps / movups for SSE integer load / store (one byte shorter).
Dan Gohman11821702007-07-27 17:16:43 +00002988def : Pat<(alignedloadv4i32 addr:$src),
2989 (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
2990def : Pat<(loadv4i32 addr:$src),
2991 (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
Evan Cheng51a49b22007-07-20 00:27:43 +00002992def : Pat<(alignedloadv2i64 addr:$src),
2993 (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
2994def : Pat<(loadv2i64 addr:$src),
2995 (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
2996
2997def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
2998 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2999def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
3000 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3001def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
3002 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3003def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
3004 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3005def : Pat<(store (v2i64 VR128:$src), addr:$dst),
3006 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3007def : Pat<(store (v4i32 VR128:$src), addr:$dst),
3008 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3009def : Pat<(store (v8i16 VR128:$src), addr:$dst),
3010 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3011def : Pat<(store (v16i8 VR128:$src), addr:$dst),
3012 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
Nate Begemanb2975562008-02-03 07:18:54 +00003013
3014//===----------------------------------------------------------------------===//
3015// SSE4.1 Instructions
3016//===----------------------------------------------------------------------===//
3017
Nate Begemanb2975562008-02-03 07:18:54 +00003018multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
3019 bits<8> opcsd, bits<8> opcpd,
3020 string OpcodeStr,
3021 Intrinsic F32Int,
3022 Intrinsic V4F32Int,
3023 Intrinsic F64Int,
Nate Begemaneb3f5432008-02-04 05:34:34 +00003024 Intrinsic V2F64Int> {
Nate Begemanb2975562008-02-03 07:18:54 +00003025 // Intrinsic operation, reg.
Evan Cheng78d00612008-03-14 07:39:27 +00003026 def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
Nate Begeman72d802a2008-02-04 06:00:24 +00003027 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003028 !strconcat(OpcodeStr,
3029 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003030 [(set VR128:$dst, (F32Int VR128:$src1, imm:$src2))]>,
3031 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003032
3033 // Intrinsic operation, mem.
Evan Cheng78d00612008-03-14 07:39:27 +00003034 def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
Nate Begeman72d802a2008-02-04 06:00:24 +00003035 (outs VR128:$dst), (ins ssmem:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003036 !strconcat(OpcodeStr,
3037 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003038 [(set VR128:$dst, (F32Int sse_load_f32:$src1, imm:$src2))]>,
3039 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003040
3041 // Vector intrinsic operation, reg
Evan Cheng78d00612008-03-14 07:39:27 +00003042 def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
Nate Begeman72d802a2008-02-04 06:00:24 +00003043 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003044 !strconcat(OpcodeStr,
3045 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003046 [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
3047 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003048
3049 // Vector intrinsic operation, mem
Evan Cheng78d00612008-03-14 07:39:27 +00003050 def PSm_Int : SS4AIi8<opcps, MRMSrcMem,
Nate Begeman72d802a2008-02-04 06:00:24 +00003051 (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003052 !strconcat(OpcodeStr,
3053 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003054 [(set VR128:$dst, (V4F32Int (load addr:$src1),imm:$src2))]>,
3055 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003056
3057 // Intrinsic operation, reg.
Evan Cheng78d00612008-03-14 07:39:27 +00003058 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
Nate Begeman72d802a2008-02-04 06:00:24 +00003059 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003060 !strconcat(OpcodeStr,
3061 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003062 [(set VR128:$dst, (F64Int VR128:$src1, imm:$src2))]>,
3063 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003064
3065 // Intrinsic operation, mem.
Evan Cheng78d00612008-03-14 07:39:27 +00003066 def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
Nate Begeman72d802a2008-02-04 06:00:24 +00003067 (outs VR128:$dst), (ins sdmem:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003068 !strconcat(OpcodeStr,
3069 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003070 [(set VR128:$dst, (F64Int sse_load_f64:$src1, imm:$src2))]>,
3071 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003072
3073 // Vector intrinsic operation, reg
Evan Cheng78d00612008-03-14 07:39:27 +00003074 def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
Nate Begeman72d802a2008-02-04 06:00:24 +00003075 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003076 !strconcat(OpcodeStr,
3077 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003078 [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
3079 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003080
3081 // Vector intrinsic operation, mem
Evan Cheng78d00612008-03-14 07:39:27 +00003082 def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
Nate Begeman72d802a2008-02-04 06:00:24 +00003083 (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
Nate Begemanb2975562008-02-03 07:18:54 +00003084 !strconcat(OpcodeStr,
3085 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemaneb3f5432008-02-04 05:34:34 +00003086 [(set VR128:$dst, (V2F64Int (load addr:$src1),imm:$src2))]>,
3087 OpSize;
Nate Begemanb2975562008-02-03 07:18:54 +00003088}
3089
3090// FP round - roundss, roundps, roundsd, roundpd
3091defm ROUND : sse41_fp_unop_rm<0x0A, 0x08, 0x0B, 0x09, "round",
3092 int_x86_sse41_round_ss, int_x86_sse41_round_ps,
3093 int_x86_sse41_round_sd, int_x86_sse41_round_pd>;
Nate Begemaneb3f5432008-02-04 05:34:34 +00003094
3095// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
3096multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
3097 Intrinsic IntId128> {
3098 def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
3099 (ins VR128:$src),
3100 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3101 [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
3102 def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
3103 (ins i128mem:$src),
3104 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3105 [(set VR128:$dst,
3106 (IntId128
3107 (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
3108}
3109
3110defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
3111 int_x86_sse41_phminposuw>;
3112
3113/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
Evan Cheng3ea4d672008-03-05 08:19:16 +00003114let Constraints = "$src1 = $dst" in {
Nate Begemaneb3f5432008-02-04 05:34:34 +00003115 multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
3116 Intrinsic IntId128, bit Commutable = 0> {
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003117 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
3118 (ins VR128:$src1, VR128:$src2),
3119 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3120 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
3121 OpSize {
Nate Begemaneb3f5432008-02-04 05:34:34 +00003122 let isCommutable = Commutable;
3123 }
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003124 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
3125 (ins VR128:$src1, i128mem:$src2),
3126 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3127 [(set VR128:$dst,
3128 (IntId128 VR128:$src1,
3129 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
Nate Begemaneb3f5432008-02-04 05:34:34 +00003130 }
3131}
3132
3133defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
3134 int_x86_sse41_pcmpeqq, 1>;
3135defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
3136 int_x86_sse41_packusdw, 0>;
3137defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
3138 int_x86_sse41_pminsb, 1>;
3139defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
3140 int_x86_sse41_pminsd, 1>;
3141defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
3142 int_x86_sse41_pminud, 1>;
3143defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
3144 int_x86_sse41_pminuw, 1>;
3145defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
3146 int_x86_sse41_pmaxsb, 1>;
3147defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
3148 int_x86_sse41_pmaxsd, 1>;
3149defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
3150 int_x86_sse41_pmaxud, 1>;
3151defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
3152 int_x86_sse41_pmaxuw, 1>;
Nate Begemaneb3f5432008-02-04 05:34:34 +00003153defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq",
3154 int_x86_sse41_pmuldq, 1>;
Nate Begeman72d802a2008-02-04 06:00:24 +00003155
Nate Begeman58057962008-02-09 01:38:08 +00003156
3157/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
Evan Cheng3ea4d672008-03-05 08:19:16 +00003158let Constraints = "$src1 = $dst" in {
Nate Begeman58057962008-02-09 01:38:08 +00003159 multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, SDNode OpNode,
3160 Intrinsic IntId128, bit Commutable = 0> {
3161 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
3162 (ins VR128:$src1, VR128:$src2),
3163 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3164 [(set VR128:$dst, (OpNode (v4i32 VR128:$src1),
3165 VR128:$src2))]>, OpSize {
3166 let isCommutable = Commutable;
3167 }
3168 def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
3169 (ins VR128:$src1, VR128:$src2),
3170 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3171 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
3172 OpSize {
3173 let isCommutable = Commutable;
3174 }
3175 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
3176 (ins VR128:$src1, i128mem:$src2),
3177 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3178 [(set VR128:$dst,
3179 (OpNode VR128:$src1, (memopv4i32 addr:$src2)))]>, OpSize;
3180 def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
3181 (ins VR128:$src1, i128mem:$src2),
3182 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3183 [(set VR128:$dst,
3184 (IntId128 VR128:$src1, (memopv4i32 addr:$src2)))]>,
3185 OpSize;
3186 }
3187}
3188defm PMULLD : SS41I_binop_patint<0x40, "pmulld", mul,
3189 int_x86_sse41_pmulld, 1>;
3190
3191
Evan Cheng78d00612008-03-14 07:39:27 +00003192/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
Evan Cheng3ea4d672008-03-05 08:19:16 +00003193let Constraints = "$src1 = $dst" in {
Nate Begeman72d802a2008-02-04 06:00:24 +00003194 multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
3195 Intrinsic IntId128, bit Commutable = 0> {
Evan Cheng78d00612008-03-14 07:39:27 +00003196 def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003197 (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
3198 !strconcat(OpcodeStr,
Nate Begemanb4e9a042008-02-10 18:47:57 +00003199 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003200 [(set VR128:$dst,
3201 (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
3202 OpSize {
Nate Begeman72d802a2008-02-04 06:00:24 +00003203 let isCommutable = Commutable;
3204 }
Evan Cheng78d00612008-03-14 07:39:27 +00003205 def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003206 (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
3207 !strconcat(OpcodeStr,
Nate Begemanb4e9a042008-02-10 18:47:57 +00003208 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003209 [(set VR128:$dst,
3210 (IntId128 VR128:$src1,
3211 (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
3212 OpSize;
Nate Begeman72d802a2008-02-04 06:00:24 +00003213 }
3214}
3215
3216defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
3217 int_x86_sse41_blendps, 0>;
3218defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
3219 int_x86_sse41_blendpd, 0>;
3220defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
3221 int_x86_sse41_pblendw, 0>;
3222defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
3223 int_x86_sse41_dpps, 1>;
3224defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
3225 int_x86_sse41_dppd, 1>;
3226defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
3227 int_x86_sse41_mpsadbw, 0>;
Nate Begeman58057962008-02-09 01:38:08 +00003228
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003229
Evan Cheng78d00612008-03-14 07:39:27 +00003230/// SS41I_ternary_int - SSE 4.1 ternary operator
Evan Cheng3ea4d672008-03-05 08:19:16 +00003231let Uses = [XMM0], Constraints = "$src1 = $dst" in {
Nate Begemanb4e9a042008-02-10 18:47:57 +00003232 multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
3233 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
3234 (ins VR128:$src1, VR128:$src2),
3235 !strconcat(OpcodeStr,
3236 "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
3237 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
3238 OpSize;
3239
3240 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
3241 (ins VR128:$src1, i128mem:$src2),
3242 !strconcat(OpcodeStr,
3243 "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
3244 [(set VR128:$dst,
3245 (IntId VR128:$src1,
3246 (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
3247 }
3248}
3249
3250defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
3251defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
3252defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
3253
3254
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003255multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
3256 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3257 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3258 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
3259
3260 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
3261 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3262 [(set VR128:$dst,
3263 (IntId (bitconvert (v4i32 (load addr:$src)))))]>, OpSize;
3264}
3265
3266defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
3267defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
3268defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
3269defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
3270defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
3271defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
3272
3273multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
3274 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3275 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3276 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
3277
3278 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
3279 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3280 [(set VR128:$dst,
3281 (IntId (bitconvert (v4i32 (load addr:$src)))))]>, OpSize;
3282}
3283
3284defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
3285defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
3286defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
3287defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
3288
3289multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
3290 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3291 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3292 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
3293
3294 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
3295 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3296 [(set VR128:$dst,
3297 (IntId (bitconvert (v4i32 (load addr:$src)))))]>, OpSize;
3298}
3299
3300defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
3301defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>;
3302
3303
Nate Begemand77e59e2008-02-11 04:19:36 +00003304/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
3305multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
Evan Chengc2054be2008-03-26 08:11:49 +00003306 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003307 (ins VR128:$src1, i32i8imm:$src2),
3308 !strconcat(OpcodeStr,
3309 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemand77e59e2008-02-11 04:19:36 +00003310 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
3311 OpSize;
Evan Cheng78d00612008-03-14 07:39:27 +00003312 def mr : SS4AIi8<opc, MRMDestMem, (outs),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003313 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
3314 !strconcat(OpcodeStr,
3315 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Nate Begemand77e59e2008-02-11 04:19:36 +00003316 []>, OpSize;
3317// FIXME:
3318// There's an AssertZext in the way of writing the store pattern
3319// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003320}
3321
Nate Begemand77e59e2008-02-11 04:19:36 +00003322defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003323
Nate Begemand77e59e2008-02-11 04:19:36 +00003324
3325/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
3326multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
Evan Cheng78d00612008-03-14 07:39:27 +00003327 def mr : SS4AIi8<opc, MRMDestMem, (outs),
Nate Begemand77e59e2008-02-11 04:19:36 +00003328 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
3329 !strconcat(OpcodeStr,
3330 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3331 []>, OpSize;
3332// FIXME:
3333// There's an AssertZext in the way of writing the store pattern
3334// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
3335}
3336
3337defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
3338
3339
3340/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
3341multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
Evan Chengc2054be2008-03-26 08:11:49 +00003342 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003343 (ins VR128:$src1, i32i8imm:$src2),
3344 !strconcat(OpcodeStr,
3345 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3346 [(set GR32:$dst,
3347 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
Evan Cheng78d00612008-03-14 07:39:27 +00003348 def mr : SS4AIi8<opc, MRMDestMem, (outs),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003349 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
3350 !strconcat(OpcodeStr,
3351 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3352 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
3353 addr:$dst)]>, OpSize;
Nate Begeman58057962008-02-09 01:38:08 +00003354}
3355
Nate Begemand77e59e2008-02-11 04:19:36 +00003356defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
Nate Begeman58057962008-02-09 01:38:08 +00003357
Nate Begemand77e59e2008-02-11 04:19:36 +00003358
Evan Cheng6c249332008-03-24 21:52:23 +00003359/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
3360/// destination
Nate Begemand77e59e2008-02-11 04:19:36 +00003361multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
Evan Chengc2054be2008-03-26 08:11:49 +00003362 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003363 (ins VR128:$src1, i32i8imm:$src2),
3364 !strconcat(OpcodeStr,
3365 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Dan Gohman788db592008-04-16 02:32:24 +00003366 [(set GR32:$dst,
3367 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
Evan Cheng6c249332008-03-24 21:52:23 +00003368 OpSize;
Evan Cheng78d00612008-03-14 07:39:27 +00003369 def mr : SS4AIi8<opc, MRMDestMem, (outs),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003370 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
3371 !strconcat(OpcodeStr,
3372 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Evan Cheng6c249332008-03-24 21:52:23 +00003373 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003374 addr:$dst)]>, OpSize;
Nate Begeman58057962008-02-09 01:38:08 +00003375}
3376
Nate Begemand77e59e2008-02-11 04:19:36 +00003377defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
Nate Begeman9a58b8a2008-02-09 23:46:37 +00003378
Evan Cheng3ea4d672008-03-05 08:19:16 +00003379let Constraints = "$src1 = $dst" in {
Nate Begemand77e59e2008-02-11 04:19:36 +00003380 multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
Evan Cheng78d00612008-03-14 07:39:27 +00003381 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
Nate Begemand77e59e2008-02-11 04:19:36 +00003382 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
3383 !strconcat(OpcodeStr,
3384 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3385 [(set VR128:$dst,
3386 (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
Evan Cheng78d00612008-03-14 07:39:27 +00003387 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
Nate Begemand77e59e2008-02-11 04:19:36 +00003388 (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
3389 !strconcat(OpcodeStr,
3390 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3391 [(set VR128:$dst,
3392 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
3393 imm:$src3))]>, OpSize;
3394 }
3395}
3396
3397defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
3398
Evan Cheng3ea4d672008-03-05 08:19:16 +00003399let Constraints = "$src1 = $dst" in {
Nate Begemand77e59e2008-02-11 04:19:36 +00003400 multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
Evan Cheng78d00612008-03-14 07:39:27 +00003401 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
Nate Begemand77e59e2008-02-11 04:19:36 +00003402 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
3403 !strconcat(OpcodeStr,
3404 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3405 [(set VR128:$dst,
3406 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
3407 OpSize;
Evan Cheng78d00612008-03-14 07:39:27 +00003408 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
Nate Begemand77e59e2008-02-11 04:19:36 +00003409 (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
3410 !strconcat(OpcodeStr,
3411 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3412 [(set VR128:$dst,
3413 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
3414 imm:$src3)))]>, OpSize;
3415 }
3416}
3417
3418defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
3419
Evan Cheng3ea4d672008-03-05 08:19:16 +00003420let Constraints = "$src1 = $dst" in {
Nate Begemand77e59e2008-02-11 04:19:36 +00003421 multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
Evan Cheng78d00612008-03-14 07:39:27 +00003422 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
Nate Begemand77e59e2008-02-11 04:19:36 +00003423 (ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
3424 !strconcat(OpcodeStr,
3425 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3426 [(set VR128:$dst,
3427 (X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize;
Evan Cheng78d00612008-03-14 07:39:27 +00003428 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
Nate Begemand77e59e2008-02-11 04:19:36 +00003429 (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
3430 !strconcat(OpcodeStr,
3431 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3432 [(set VR128:$dst,
3433 (X86insrtps VR128:$src1, (loadf32 addr:$src2),
3434 imm:$src3))]>, OpSize;
3435 }
3436}
3437
Evan Chengc2054be2008-03-26 08:11:49 +00003438defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
Nate Begeman0dd3cb52008-03-16 21:14:46 +00003439
3440let Defs = [EFLAGS] in {
3441def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
3442 "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
3443def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
3444 "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
3445}
3446
3447def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3448 "movntdqa\t{$src, $dst|$dst, $src}",
3449 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;