blob: 7fb7d2efb25c9daf41d7b0fbd238b615077318f7 [file] [log] [blame]
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001//====- X86InstrSSE.td - Describe the X86 Instruction Set -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Evan Cheng and is distributed under the University
6// of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 SSE instruction set, defining the instructions,
11// and properties of the instructions which are needed for code generation,
12// machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16
17//===----------------------------------------------------------------------===//
18// SSE specific DAG Nodes.
19//===----------------------------------------------------------------------===//
20
21def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
22 SDTCisFP<0>, SDTCisInt<2> ]>;
23
Dan Gohmanf17a25c2007-07-18 16:29:46 +000024def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
25def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
26def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
27 [SDNPCommutative, SDNPAssociative]>;
28def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
29 [SDNPCommutative, SDNPAssociative]>;
30def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
31 [SDNPCommutative, SDNPAssociative]>;
32def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
33def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
34def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
35def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
36 [SDNPHasChain, SDNPOutFlag]>;
37def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest,
38 [SDNPHasChain, SDNPOutFlag]>;
39def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
40def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
41def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
42
43//===----------------------------------------------------------------------===//
44// SSE 'Special' Instructions
45//===----------------------------------------------------------------------===//
46
Evan Chengb783fa32007-07-19 01:14:50 +000047def IMPLICIT_DEF_VR128 : I<0, Pseudo, (outs VR128:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000048 "#IMPLICIT_DEF $dst",
49 [(set VR128:$dst, (v4f32 (undef)))]>,
50 Requires<[HasSSE1]>;
Evan Chengb783fa32007-07-19 01:14:50 +000051def IMPLICIT_DEF_FR32 : I<0, Pseudo, (outs FR32:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000052 "#IMPLICIT_DEF $dst",
53 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +000054def IMPLICIT_DEF_FR64 : I<0, Pseudo, (outs FR64:$dst), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +000055 "#IMPLICIT_DEF $dst",
56 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
57
58//===----------------------------------------------------------------------===//
59// SSE Complex Patterns
60//===----------------------------------------------------------------------===//
61
62// These are 'extloads' from a scalar to the low element of a vector, zeroing
63// the top elements. These are used for the SSE 'ss' and 'sd' instruction
64// forms.
65def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", [],
66 [SDNPHasChain]>;
67def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", [],
68 [SDNPHasChain]>;
69
70def ssmem : Operand<v4f32> {
71 let PrintMethod = "printf32mem";
72 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
73}
74def sdmem : Operand<v2f64> {
75 let PrintMethod = "printf64mem";
76 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
77}
78
79//===----------------------------------------------------------------------===//
80// SSE pattern fragments
81//===----------------------------------------------------------------------===//
82
Dan Gohmanf17a25c2007-07-18 16:29:46 +000083def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
84def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
85def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
86def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
87
Dan Gohman11821702007-07-27 17:16:43 +000088// Like 'store', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000089def alignedstore : PatFrag<(ops node:$val, node:$ptr),
90 (st node:$val, node:$ptr), [{
91 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
92 return !ST->isTruncatingStore() &&
93 ST->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +000094 ST->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +000095 return false;
96}]>;
97
Dan Gohman11821702007-07-27 17:16:43 +000098// Like 'load', but always requires vector alignment.
Dan Gohman4a4f1512007-07-18 20:23:34 +000099def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
100 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
101 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
102 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000103 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000104 return false;
105}]>;
106
Dan Gohman11821702007-07-27 17:16:43 +0000107def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
108def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000109def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
110def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
111def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
112def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>;
113
114// Like 'load', but uses special alignment checks suitable for use in
115// memory operands in most SSE instructions, which are required to
116// be naturally aligned on some targets but not on others.
117// FIXME: Actually implement support for targets that don't require the
118// alignment. This probably wants a subtarget predicate.
119def memop : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
120 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
121 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
122 LD->getAddressingMode() == ISD::UNINDEXED &&
Dan Gohman11821702007-07-27 17:16:43 +0000123 LD->getAlignment() >= 16;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000124 return false;
125}]>;
126
Dan Gohman11821702007-07-27 17:16:43 +0000127def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
128def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
Dan Gohman4a4f1512007-07-18 20:23:34 +0000129def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
130def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
131def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
132def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
133
Bill Wendling3b15d722007-08-11 09:52:53 +0000134// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
135// 16-byte boundary.
136def memop64 : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
137 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
138 return LD->getExtensionType() == ISD::NON_EXTLOAD &&
139 LD->getAddressingMode() == ISD::UNINDEXED &&
140 LD->getAlignment() >= 8;
141 return false;
142}]>;
143
144def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
145def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop64 node:$ptr))>;
146def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
147def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
148def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
149
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000150def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
151def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
152def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
153def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
154def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
155def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
156
157def fp32imm0 : PatLeaf<(f32 fpimm), [{
158 return N->isExactlyValue(+0.0);
159}]>;
160
161def PSxLDQ_imm : SDNodeXForm<imm, [{
162 // Transformation function: imm >> 3
163 return getI32Imm(N->getValue() >> 3);
164}]>;
165
166// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
167// SHUFP* etc. imm.
168def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
169 return getI8Imm(X86::getShuffleSHUFImmediate(N));
170}]>;
171
172// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
173// PSHUFHW imm.
174def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
175 return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
176}]>;
177
178// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
179// PSHUFLW imm.
180def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
181 return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
182}]>;
183
184def SSE_splat_mask : PatLeaf<(build_vector), [{
185 return X86::isSplatMask(N);
186}], SHUFFLE_get_shuf_imm>;
187
188def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
189 return X86::isSplatLoMask(N);
190}]>;
191
192def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
193 return X86::isMOVHLPSMask(N);
194}]>;
195
196def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
197 return X86::isMOVHLPS_v_undef_Mask(N);
198}]>;
199
200def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
201 return X86::isMOVHPMask(N);
202}]>;
203
204def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
205 return X86::isMOVLPMask(N);
206}]>;
207
208def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
209 return X86::isMOVLMask(N);
210}]>;
211
212def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
213 return X86::isMOVSHDUPMask(N);
214}]>;
215
216def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
217 return X86::isMOVSLDUPMask(N);
218}]>;
219
220def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
221 return X86::isUNPCKLMask(N);
222}]>;
223
224def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
225 return X86::isUNPCKHMask(N);
226}]>;
227
228def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
229 return X86::isUNPCKL_v_undef_Mask(N);
230}]>;
231
232def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
233 return X86::isUNPCKH_v_undef_Mask(N);
234}]>;
235
236def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
237 return X86::isPSHUFDMask(N);
238}], SHUFFLE_get_shuf_imm>;
239
240def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
241 return X86::isPSHUFHWMask(N);
242}], SHUFFLE_get_pshufhw_imm>;
243
244def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
245 return X86::isPSHUFLWMask(N);
246}], SHUFFLE_get_pshuflw_imm>;
247
248def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
249 return X86::isPSHUFDMask(N);
250}], SHUFFLE_get_shuf_imm>;
251
252def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
253 return X86::isSHUFPMask(N);
254}], SHUFFLE_get_shuf_imm>;
255
256def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
257 return X86::isSHUFPMask(N);
258}], SHUFFLE_get_shuf_imm>;
259
260//===----------------------------------------------------------------------===//
261// SSE scalar FP Instructions
262//===----------------------------------------------------------------------===//
263
264// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
265// scheduler into a branch sequence.
266let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
267 def CMOV_FR32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000268 (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000269 "#CMOV_FR32 PSEUDO!",
270 [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
271 def CMOV_FR64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000272 (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000273 "#CMOV_FR64 PSEUDO!",
274 [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
275 def CMOV_V4F32 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000276 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000277 "#CMOV_V4F32 PSEUDO!",
278 [(set VR128:$dst,
279 (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
280 def CMOV_V2F64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000281 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000282 "#CMOV_V2F64 PSEUDO!",
283 [(set VR128:$dst,
284 (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
285 def CMOV_V2I64 : I<0, Pseudo,
Evan Chengb783fa32007-07-19 01:14:50 +0000286 (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000287 "#CMOV_V2I64 PSEUDO!",
288 [(set VR128:$dst,
289 (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond)))]>;
290}
291
292//===----------------------------------------------------------------------===//
293// SSE1 Instructions
294//===----------------------------------------------------------------------===//
295
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000296// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000297def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000298 "movss\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000299let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000300def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000301 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000302 [(set FR32:$dst, (loadf32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000303def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000304 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000305 [(store FR32:$src, addr:$dst)]>;
306
307// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000308def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000309 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000310 [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000311def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000312 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000313 [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000314def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000315 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000316 [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000317def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000318 "cvtsi2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000319 [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
320
321// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +0000322def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000323 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000324 [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000325def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000326 "cvtss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000327 [(set GR32:$dst, (int_x86_sse_cvtss2si
328 (load addr:$src)))]>;
329
330// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +0000331def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000332 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000333 [(set GR32:$dst,
334 (int_x86_sse_cvttss2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000335def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000336 "cvttss2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000337 [(set GR32:$dst,
338 (int_x86_sse_cvttss2si(load addr:$src)))]>;
339
340let isTwoAddress = 1 in {
341 def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000342 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000343 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000344 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
345 GR32:$src2))]>;
346 def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000347 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000348 "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000349 [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
350 (loadi32 addr:$src2)))]>;
351}
352
353// Comparison instructions
354let isTwoAddress = 1 in {
355 def CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000356 (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000357 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000358 def CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000359 (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000360 "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000361}
362
Evan Cheng55687072007-09-14 21:48:26 +0000363let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +0000364def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000365 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000366 [(X86cmp FR32:$src1, FR32:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000367def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000368 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000369 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
Evan Cheng55687072007-09-14 21:48:26 +0000370} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000371
372// Aliases to match intrinsics which expect XMM operand(s).
373let isTwoAddress = 1 in {
374 def Int_CMPSSrr : SSI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000375 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000376 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000377 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
378 VR128:$src, imm:$cc))]>;
379 def Int_CMPSSrm : SSI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000380 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000381 "cmp${cc}ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000382 [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
383 (load addr:$src), imm:$cc))]>;
384}
385
Evan Cheng55687072007-09-14 21:48:26 +0000386let Defs = [EFLAGS] in {
Evan Chengb783fa32007-07-19 01:14:50 +0000387def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000388 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000389 [(X86ucomi (v4f32 VR128:$src1), VR128:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000390def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000391 "ucomiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000392 [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2))]>;
393
Evan Chengb783fa32007-07-19 01:14:50 +0000394def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000395 "comiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000396 [(X86comi (v4f32 VR128:$src1), VR128:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000397def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000398 "comiss\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000399 [(X86comi (v4f32 VR128:$src1), (load addr:$src2))]>;
Evan Cheng55687072007-09-14 21:48:26 +0000400} // Defs = [EFLAGS]
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000401
402// Aliases of packed SSE1 instructions for scalar use. These all have names that
403// start with 'Fs'.
404
405// Alias instructions that map fld0 to pxor for sse.
Dan Gohman8aef09b2007-09-07 21:32:51 +0000406let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000407def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000408 "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000409 Requires<[HasSSE1]>, TB, OpSize;
410
411// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
412// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +0000413def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000414 "movaps\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000415
416// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
417// disregarded.
Evan Cheng4e84e452007-08-30 05:49:43 +0000418let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000419def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000420 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +0000421 [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000422
423// Alias bitwise logical operations using SSE logical ops on packed FP values.
424let isTwoAddress = 1 in {
425let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000426 def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000427 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000428 [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000429 def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000430 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000431 [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000432 def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000433 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000434 [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
435}
436
Evan Chengb783fa32007-07-19 01:14:50 +0000437def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000438 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000439 [(set FR32:$dst, (X86fand FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000440 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000441def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000442 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000443 [(set FR32:$dst, (X86for FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000444 (memopfsf32 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000445def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000446 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000447 [(set FR32:$dst, (X86fxor FR32:$src1,
Dan Gohman11821702007-07-27 17:16:43 +0000448 (memopfsf32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000449
450def FsANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000451 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000452 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000453def FsANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000454 (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000455 "andnps\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000456}
457
458/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
459///
460/// In addition, we also have a special variant of the scalar form here to
461/// represent the associated intrinsic operation. This form is unlike the
462/// plain scalar form, in that it takes an entire vector (instead of a scalar)
463/// and leaves the top elements undefined.
464///
465/// These three forms can each be reg+reg or reg+mem, so there are a total of
466/// six "instructions".
467///
468let isTwoAddress = 1 in {
469multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
470 SDNode OpNode, Intrinsic F32Int,
471 bit Commutable = 0> {
472 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000473 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000474 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000475 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
476 let isCommutable = Commutable;
477 }
478
479 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000480 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000481 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000482 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
483
484 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000485 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000486 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000487 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
488 let isCommutable = Commutable;
489 }
490
491 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000492 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000493 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000494 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000495
496 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000497 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000498 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000499 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
500 let isCommutable = Commutable;
501 }
502
503 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000504 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000505 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000506 [(set VR128:$dst, (F32Int VR128:$src1,
507 sse_load_f32:$src2))]>;
508}
509}
510
511// Arithmetic instructions
512defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
513defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
514defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
515defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
516
517/// sse1_fp_binop_rm - Other SSE1 binops
518///
519/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
520/// instructions for a full-vector intrinsic form. Operations that map
521/// onto C operators don't use this form since they just use the plain
522/// vector form instead of having a separate vector intrinsic form.
523///
524/// This provides a total of eight "instructions".
525///
526let isTwoAddress = 1 in {
527multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
528 SDNode OpNode,
529 Intrinsic F32Int,
530 Intrinsic V4F32Int,
531 bit Commutable = 0> {
532
533 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000534 def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000535 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000536 [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
537 let isCommutable = Commutable;
538 }
539
540 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000541 def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000542 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000543 [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
544
545 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000546 def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000547 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000548 [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
549 let isCommutable = Commutable;
550 }
551
552 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000553 def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000554 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000555 [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000556
557 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000558 def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000559 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000560 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
561 let isCommutable = Commutable;
562 }
563
564 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000565 def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000566 !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000567 [(set VR128:$dst, (F32Int VR128:$src1,
568 sse_load_f32:$src2))]>;
569
570 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000571 def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000572 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000573 [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
574 let isCommutable = Commutable;
575 }
576
577 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +0000578 def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000579 !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000580 [(set VR128:$dst, (V4F32Int VR128:$src1, (load addr:$src2)))]>;
581}
582}
583
584defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
585 int_x86_sse_max_ss, int_x86_sse_max_ps>;
586defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
587 int_x86_sse_min_ss, int_x86_sse_min_ps>;
588
589//===----------------------------------------------------------------------===//
590// SSE packed FP Instructions
591
592// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000593def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000594 "movaps\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000595let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000596def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000597 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000598 [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000599
Evan Chengb783fa32007-07-19 01:14:50 +0000600def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000601 "movaps\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000602 [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000603
Evan Chengb783fa32007-07-19 01:14:50 +0000604def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000605 "movups\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000606let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000607def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000608 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000609 [(set VR128:$dst, (loadv4f32 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000610def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000611 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000612 [(store (v4f32 VR128:$src), addr:$dst)]>;
613
614// Intrinsic forms of MOVUPS load and store
Evan Cheng4e84e452007-08-30 05:49:43 +0000615let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000616def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000617 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000618 [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000619def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000620 "movups\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +0000621 [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000622
623let isTwoAddress = 1 in {
624 let AddedComplexity = 20 in {
625 def MOVLPSrm : PSI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000626 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000627 "movlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000628 [(set VR128:$dst,
629 (v4f32 (vector_shuffle VR128:$src1,
630 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
631 MOVLP_shuffle_mask)))]>;
632 def MOVHPSrm : PSI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000633 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000634 "movhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000635 [(set VR128:$dst,
636 (v4f32 (vector_shuffle VR128:$src1,
637 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
638 MOVHP_shuffle_mask)))]>;
639 } // AddedComplexity
640} // isTwoAddress
641
Evan Chengb783fa32007-07-19 01:14:50 +0000642def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000643 "movlps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000644 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
645 (iPTR 0))), addr:$dst)]>;
646
647// v2f64 extract element 1 is always custom lowered to unpack high to low
648// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +0000649def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000650 "movhps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000651 [(store (f64 (vector_extract
652 (v2f64 (vector_shuffle
653 (bc_v2f64 (v4f32 VR128:$src)), (undef),
654 UNPCKH_shuffle_mask)), (iPTR 0))),
655 addr:$dst)]>;
656
657let isTwoAddress = 1 in {
658let AddedComplexity = 15 in {
Evan Chengb783fa32007-07-19 01:14:50 +0000659def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000660 "movlhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000661 [(set VR128:$dst,
662 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
663 MOVHP_shuffle_mask)))]>;
664
Evan Chengb783fa32007-07-19 01:14:50 +0000665def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000666 "movhlps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000667 [(set VR128:$dst,
668 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
669 MOVHLPS_shuffle_mask)))]>;
670} // AddedComplexity
671} // isTwoAddress
672
673
674
675// Arithmetic
676
677/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
678///
679/// In addition, we also have a special variant of the scalar form here to
680/// represent the associated intrinsic operation. This form is unlike the
681/// plain scalar form, in that it takes an entire vector (instead of a
682/// scalar) and leaves the top elements undefined.
683///
684/// And, we have a special variant form for a full-vector intrinsic form.
685///
686/// These four forms can each have a reg or a mem operand, so there are a
687/// total of eight "instructions".
688///
689multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
690 SDNode OpNode,
691 Intrinsic F32Int,
692 Intrinsic V4F32Int,
693 bit Commutable = 0> {
694 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000695 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000696 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000697 [(set FR32:$dst, (OpNode FR32:$src))]> {
698 let isCommutable = Commutable;
699 }
700
701 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000702 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000703 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000704 [(set FR32:$dst, (OpNode (load addr:$src)))]>;
705
706 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000707 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000708 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000709 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
710 let isCommutable = Commutable;
711 }
712
713 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000714 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000715 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +0000716 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000717
718 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +0000719 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000720 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000721 [(set VR128:$dst, (F32Int VR128:$src))]> {
722 let isCommutable = Commutable;
723 }
724
725 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +0000726 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000727 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000728 [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
729
730 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +0000731 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000732 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000733 [(set VR128:$dst, (V4F32Int VR128:$src))]> {
734 let isCommutable = Commutable;
735 }
736
737 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +0000738 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000739 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000740 [(set VR128:$dst, (V4F32Int (load addr:$src)))]>;
741}
742
743// Square root.
744defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
745 int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
746
747// Reciprocal approximations. Note that these typically require refinement
748// in order to obtain suitable precision.
749defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
750 int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
751defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
752 int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
753
754// Logical
755let isTwoAddress = 1 in {
756 let isCommutable = 1 in {
757 def ANDPSrr : PSI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000758 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000759 "andps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000760 [(set VR128:$dst, (v2i64
761 (and VR128:$src1, VR128:$src2)))]>;
762 def ORPSrr : PSI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000763 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000764 "orps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000765 [(set VR128:$dst, (v2i64
766 (or VR128:$src1, VR128:$src2)))]>;
767 def XORPSrr : PSI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000768 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000769 "xorps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000770 [(set VR128:$dst, (v2i64
771 (xor VR128:$src1, VR128:$src2)))]>;
772 }
773
774 def ANDPSrm : PSI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000775 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000776 "andps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000777 [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
778 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000779 def ORPSrm : PSI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000780 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000781 "orps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000782 [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
783 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000784 def XORPSrm : PSI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000785 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000786 "xorps\t{$src2, $dst|$dst, $src2}",
Evan Cheng8e92cd12007-07-19 23:34:10 +0000787 [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
788 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000789 def ANDNPSrr : PSI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000790 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000791 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000792 [(set VR128:$dst,
793 (v2i64 (and (xor VR128:$src1,
794 (bc_v2i64 (v4i32 immAllOnesV))),
795 VR128:$src2)))]>;
796 def ANDNPSrm : PSI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000797 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000798 "andnps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000799 [(set VR128:$dst,
Evan Cheng8e92cd12007-07-19 23:34:10 +0000800 (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000801 (bc_v2i64 (v4i32 immAllOnesV))),
Evan Cheng8e92cd12007-07-19 23:34:10 +0000802 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000803}
804
805let isTwoAddress = 1 in {
806 def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000807 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000808 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000809 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
810 VR128:$src, imm:$cc))]>;
811 def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000812 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +0000813 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000814 [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
815 (load addr:$src), imm:$cc))]>;
816}
817
818// Shuffle and unpack instructions
819let isTwoAddress = 1 in {
820 let isConvertibleToThreeAddress = 1 in // Convert to pshufd
821 def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000822 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000823 VR128:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000824 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000825 [(set VR128:$dst,
826 (v4f32 (vector_shuffle
827 VR128:$src1, VR128:$src2,
828 SHUFP_shuffle_mask:$src3)))]>;
829 def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000830 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000831 f128mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +0000832 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000833 [(set VR128:$dst,
834 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000835 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000836 SHUFP_shuffle_mask:$src3)))]>;
837
838 let AddedComplexity = 10 in {
839 def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000840 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000841 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000842 [(set VR128:$dst,
843 (v4f32 (vector_shuffle
844 VR128:$src1, VR128:$src2,
845 UNPCKH_shuffle_mask)))]>;
846 def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000847 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000848 "unpckhps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000849 [(set VR128:$dst,
850 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000851 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000852 UNPCKH_shuffle_mask)))]>;
853
854 def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000855 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000856 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000857 [(set VR128:$dst,
858 (v4f32 (vector_shuffle
859 VR128:$src1, VR128:$src2,
860 UNPCKL_shuffle_mask)))]>;
861 def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +0000862 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000863 "unpcklps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000864 [(set VR128:$dst,
865 (v4f32 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +0000866 VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000867 UNPCKL_shuffle_mask)))]>;
868 } // AddedComplexity
869} // isTwoAddress
870
871// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +0000872def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000873 "movmskps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000874 [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000875def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000876 "movmskpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000877 [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
878
879// Prefetching loads.
880// TODO: no intrinsics for these?
Dan Gohman91888f02007-07-31 20:11:57 +0000881def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", []>;
882def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1\t$src", []>;
883def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2\t$src", []>;
884def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000885
886// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +0000887def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000888 "movntps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000889 [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
890
891// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +0000892def SFENCE : PSI<0xAE, MRM7m, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000893
894// MXCSR register
Evan Chengb783fa32007-07-19 01:14:50 +0000895def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000896 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000897def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
Dan Gohman91888f02007-07-31 20:11:57 +0000898 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000899
900// Alias instructions that map zero vector to pxor / xorp* for sse.
901// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
902let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000903def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +0000904 "xorps\t$dst, $dst",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000905 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
906
907// FR32 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +0000908def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000909 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000910 [(set VR128:$dst,
911 (v4f32 (scalar_to_vector FR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000912def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000913 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000914 [(set VR128:$dst,
915 (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
916
917// FIXME: may not be able to eliminate this movss with coalescing the src and
918// dest register classes are different. We really want to write this pattern
919// like this:
920// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
921// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +0000922def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000923 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000924 [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
925 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000926def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000927 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000928 [(store (f32 (vector_extract (v4f32 VR128:$src),
929 (iPTR 0))), addr:$dst)]>;
930
931
932// Move to lower bits of a VR128, leaving upper bits alone.
933// Three operand (but two address) aliases.
934let isTwoAddress = 1 in {
935 def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000936 (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000937 "movss\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000938
939 let AddedComplexity = 15 in
940 def MOVLPSrr : SSI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +0000941 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +0000942 "movss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000943 [(set VR128:$dst,
944 (v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
945 MOVL_shuffle_mask)))]>;
946}
947
948// Move to lower bits of a VR128 and zeroing upper bits.
949// Loading from memory automatically zeroing upper bits.
950let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +0000951def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000952 "movss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000953 [(set VR128:$dst, (v4f32 (vector_shuffle immAllZerosV,
954 (v4f32 (scalar_to_vector (loadf32 addr:$src))),
955 MOVL_shuffle_mask)))]>;
956
957
958//===----------------------------------------------------------------------===//
959// SSE2 Instructions
960//===----------------------------------------------------------------------===//
961
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000962// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000963def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000964 "movsd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +0000965let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +0000966def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000967 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000968 [(set FR64:$dst, (loadf64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000969def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000970 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000971 [(store FR64:$src, addr:$dst)]>;
972
973// Conversion instructions
Evan Chengb783fa32007-07-19 01:14:50 +0000974def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000975 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000976 [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000977def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000978 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000979 [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000980def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000981 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000982 [(set FR32:$dst, (fround FR64:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000983def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000984 "cvtsd2ss\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000985 [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000986def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000987 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000988 [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000989def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000990 "cvtsi2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000991 [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
992
993// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +0000994def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000995 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +0000996 [(set FR64:$dst, (fextend FR32:$src))]>, XS,
997 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +0000998def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +0000999 "cvtss2sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001000 [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
1001 Requires<[HasSSE2]>;
1002
1003// Match intrinsics which expect XMM operand(s).
Evan Chengb783fa32007-07-19 01:14:50 +00001004def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001005 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001006 [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001007def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001008 "cvtsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001009 [(set GR32:$dst, (int_x86_sse2_cvtsd2si
1010 (load addr:$src)))]>;
1011
1012// Aliases for intrinsics
Evan Chengb783fa32007-07-19 01:14:50 +00001013def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001014 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001015 [(set GR32:$dst,
1016 (int_x86_sse2_cvttsd2si VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001017def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001018 "cvttsd2si\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001019 [(set GR32:$dst, (int_x86_sse2_cvttsd2si
1020 (load addr:$src)))]>;
1021
1022// Comparison instructions
1023let isTwoAddress = 1 in {
1024 def CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001025 (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001026 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001027 def CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001028 (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001029 "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001030}
1031
Evan Chengb783fa32007-07-19 01:14:50 +00001032def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001033 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001034 [(X86cmp FR64:$src1, FR64:$src2)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001035def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001036 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001037 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
1038
1039// Aliases to match intrinsics which expect XMM operand(s).
1040let isTwoAddress = 1 in {
1041 def Int_CMPSDrr : SDI<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001042 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001043 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001044 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1045 VR128:$src, imm:$cc))]>;
1046 def Int_CMPSDrm : SDI<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001047 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001048 "cmp${cc}sd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001049 [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
1050 (load addr:$src), imm:$cc))]>;
1051}
1052
Evan Chengb783fa32007-07-19 01:14:50 +00001053def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001054 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001055 [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001056def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001057 "ucomisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001058 [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2))]>;
1059
Evan Chengb783fa32007-07-19 01:14:50 +00001060def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001061 "comisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001062 [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001063def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001064 "comisd\t{$src2, $src1|$src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001065 [(X86comi (v2f64 VR128:$src1), (load addr:$src2))]>;
1066
1067// Aliases of packed SSE2 instructions for scalar use. These all have names that
1068// start with 'Fs'.
1069
1070// Alias instructions that map fld0 to pxor for sse.
Dan Gohman8aef09b2007-09-07 21:32:51 +00001071let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001072def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00001073 "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001074 Requires<[HasSSE2]>, TB, OpSize;
1075
1076// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
1077// disregarded.
Evan Chengb783fa32007-07-19 01:14:50 +00001078def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001079 "movapd\t{$src, $dst|$dst, $src}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001080
1081// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
1082// disregarded.
Evan Cheng4e84e452007-08-30 05:49:43 +00001083let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001084def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001085 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman11821702007-07-27 17:16:43 +00001086 [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001087
1088// Alias bitwise logical operations using SSE logical ops on packed FP values.
1089let isTwoAddress = 1 in {
1090let isCommutable = 1 in {
Evan Chengb783fa32007-07-19 01:14:50 +00001091 def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001092 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001093 [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001094 def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001095 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001096 [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001097 def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001098 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001099 [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
1100}
1101
Evan Chengb783fa32007-07-19 01:14:50 +00001102def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001103 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001104 [(set FR64:$dst, (X86fand FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001105 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001106def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001107 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001108 [(set FR64:$dst, (X86for FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001109 (memopfsf64 addr:$src2)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001110def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001111 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001112 [(set FR64:$dst, (X86fxor FR64:$src1,
Dan Gohman11821702007-07-27 17:16:43 +00001113 (memopfsf64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001114
1115def FsANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001116 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001117 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001118def FsANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001119 (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001120 "andnpd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001121}
1122
1123/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
1124///
1125/// In addition, we also have a special variant of the scalar form here to
1126/// represent the associated intrinsic operation. This form is unlike the
1127/// plain scalar form, in that it takes an entire vector (instead of a scalar)
1128/// and leaves the top elements undefined.
1129///
1130/// These three forms can each be reg+reg or reg+mem, so there are a total of
1131/// six "instructions".
1132///
1133let isTwoAddress = 1 in {
1134multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1135 SDNode OpNode, Intrinsic F64Int,
1136 bit Commutable = 0> {
1137 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001138 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001139 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001140 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1141 let isCommutable = Commutable;
1142 }
1143
1144 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001145 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001146 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001147 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1148
1149 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001150 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001151 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001152 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1153 let isCommutable = Commutable;
1154 }
1155
1156 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001157 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001158 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001159 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001160
1161 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001162 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001163 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001164 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1165 let isCommutable = Commutable;
1166 }
1167
1168 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001169 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001170 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001171 [(set VR128:$dst, (F64Int VR128:$src1,
1172 sse_load_f64:$src2))]>;
1173}
1174}
1175
1176// Arithmetic instructions
1177defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
1178defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
1179defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
1180defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
1181
1182/// sse2_fp_binop_rm - Other SSE2 binops
1183///
1184/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
1185/// instructions for a full-vector intrinsic form. Operations that map
1186/// onto C operators don't use this form since they just use the plain
1187/// vector form instead of having a separate vector intrinsic form.
1188///
1189/// This provides a total of eight "instructions".
1190///
1191let isTwoAddress = 1 in {
1192multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
1193 SDNode OpNode,
1194 Intrinsic F64Int,
1195 Intrinsic V2F64Int,
1196 bit Commutable = 0> {
1197
1198 // Scalar operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001199 def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001200 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001201 [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
1202 let isCommutable = Commutable;
1203 }
1204
1205 // Scalar operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001206 def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001207 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001208 [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
1209
1210 // Vector operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001211 def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001212 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001213 [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
1214 let isCommutable = Commutable;
1215 }
1216
1217 // Vector operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001218 def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001219 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001220 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001221
1222 // Intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001223 def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001224 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001225 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
1226 let isCommutable = Commutable;
1227 }
1228
1229 // Intrinsic operation, reg+mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001230 def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001231 !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001232 [(set VR128:$dst, (F64Int VR128:$src1,
1233 sse_load_f64:$src2))]>;
1234
1235 // Vector intrinsic operation, reg+reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001236 def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001237 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001238 [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
1239 let isCommutable = Commutable;
1240 }
1241
1242 // Vector intrinsic operation, reg+mem.
Dan Gohmanc747be52007-08-02 21:06:40 +00001243 def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001244 !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001245 [(set VR128:$dst, (V2F64Int VR128:$src1, (load addr:$src2)))]>;
1246}
1247}
1248
1249defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
1250 int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
1251defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
1252 int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
1253
1254//===----------------------------------------------------------------------===//
1255// SSE packed FP Instructions
1256
1257// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001258def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001259 "movapd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001260let isLoad = 1, isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001261def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001262 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001263 [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001264
Evan Chengb783fa32007-07-19 01:14:50 +00001265def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001266 "movapd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001267 [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001268
Evan Chengb783fa32007-07-19 01:14:50 +00001269def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001270 "movupd\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001271let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001272def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001273 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001274 [(set VR128:$dst, (loadv2f64 addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001275def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001276 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001277 [(store (v2f64 VR128:$src), addr:$dst)]>;
1278
1279// Intrinsic forms of MOVUPD load and store
Evan Chengb783fa32007-07-19 01:14:50 +00001280def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001281 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001282 [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001283def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001284 "movupd\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001285 [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001286
1287let isTwoAddress = 1 in {
1288 let AddedComplexity = 20 in {
1289 def MOVLPDrm : PDI<0x12, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001290 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001291 "movlpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001292 [(set VR128:$dst,
1293 (v2f64 (vector_shuffle VR128:$src1,
1294 (scalar_to_vector (loadf64 addr:$src2)),
1295 MOVLP_shuffle_mask)))]>;
1296 def MOVHPDrm : PDI<0x16, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001297 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001298 "movhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001299 [(set VR128:$dst,
1300 (v2f64 (vector_shuffle VR128:$src1,
1301 (scalar_to_vector (loadf64 addr:$src2)),
1302 MOVHP_shuffle_mask)))]>;
1303 } // AddedComplexity
1304} // isTwoAddress
1305
Evan Chengb783fa32007-07-19 01:14:50 +00001306def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001307 "movlpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001308 [(store (f64 (vector_extract (v2f64 VR128:$src),
1309 (iPTR 0))), addr:$dst)]>;
1310
1311// v2f64 extract element 1 is always custom lowered to unpack high to low
1312// and extract element 0 so the non-store version isn't too horrible.
Evan Chengb783fa32007-07-19 01:14:50 +00001313def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001314 "movhpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001315 [(store (f64 (vector_extract
1316 (v2f64 (vector_shuffle VR128:$src, (undef),
1317 UNPCKH_shuffle_mask)), (iPTR 0))),
1318 addr:$dst)]>;
1319
1320// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001321def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001322 "cvtdq2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001323 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
1324 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001325def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001326 "cvtdq2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001327 [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
Dan Gohman4a4f1512007-07-18 20:23:34 +00001328 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001329 TB, Requires<[HasSSE2]>;
1330
1331// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001332def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001333 "cvtdq2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001334 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
1335 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001336def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001337 "cvtdq2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001338 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
Dan Gohman4a4f1512007-07-18 20:23:34 +00001339 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001340 XS, Requires<[HasSSE2]>;
1341
Evan Chengb783fa32007-07-19 01:14:50 +00001342def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001343 "cvtps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001344 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001345def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001346 "cvtps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001347 [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1348 (load addr:$src)))]>;
1349// SSE2 packed instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001350def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001351 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001352 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
1353 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001354def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001355 "cvttps2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001356 [(set VR128:$dst, (int_x86_sse2_cvttps2dq
1357 (load addr:$src)))]>,
1358 XS, Requires<[HasSSE2]>;
1359
1360// SSE2 packed instructions with XD prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001361def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001362 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001363 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
1364 XD, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001365def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001366 "cvtpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001367 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1368 (load addr:$src)))]>,
1369 XD, Requires<[HasSSE2]>;
1370
Evan Chengb783fa32007-07-19 01:14:50 +00001371def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001372 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001373 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001374def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001375 "cvttpd2dq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001376 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1377 (load addr:$src)))]>;
1378
1379// SSE2 instructions without OpSize prefix
Evan Chengb783fa32007-07-19 01:14:50 +00001380def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001381 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001382 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
1383 TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001384def Int_CVTPS2PDrm : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001385 "cvtps2pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001386 [(set VR128:$dst, (int_x86_sse2_cvtps2pd
1387 (load addr:$src)))]>,
1388 TB, Requires<[HasSSE2]>;
1389
Evan Chengb783fa32007-07-19 01:14:50 +00001390def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001391 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001392 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001393def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001394 "cvtpd2ps\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001395 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
1396 (load addr:$src)))]>;
1397
1398// Match intrinsics which expect XMM operand(s).
1399// Aliases for intrinsics
1400let isTwoAddress = 1 in {
1401def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001402 (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001403 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001404 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1405 GR32:$src2))]>;
1406def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001407 (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001408 "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001409 [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
1410 (loadi32 addr:$src2)))]>;
1411def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001412 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001413 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001414 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1415 VR128:$src2))]>;
1416def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001417 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001418 "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001419 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
1420 (load addr:$src2)))]>;
1421def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001422 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001423 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001424 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1425 VR128:$src2))]>, XS,
1426 Requires<[HasSSE2]>;
1427def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001428 (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001429 "cvtss2sd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001430 [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1431 (load addr:$src2)))]>, XS,
1432 Requires<[HasSSE2]>;
1433}
1434
1435// Arithmetic
1436
1437/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
1438///
1439/// In addition, we also have a special variant of the scalar form here to
1440/// represent the associated intrinsic operation. This form is unlike the
1441/// plain scalar form, in that it takes an entire vector (instead of a
1442/// scalar) and leaves the top elements undefined.
1443///
1444/// And, we have a special variant form for a full-vector intrinsic form.
1445///
1446/// These four forms can each have a reg or a mem operand, so there are a
1447/// total of eight "instructions".
1448///
1449multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
1450 SDNode OpNode,
1451 Intrinsic F64Int,
1452 Intrinsic V2F64Int,
1453 bit Commutable = 0> {
1454 // Scalar operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001455 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001456 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001457 [(set FR64:$dst, (OpNode FR64:$src))]> {
1458 let isCommutable = Commutable;
1459 }
1460
1461 // Scalar operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001462 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001463 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001464 [(set FR64:$dst, (OpNode (load addr:$src)))]>;
1465
1466 // Vector operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001467 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001468 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001469 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
1470 let isCommutable = Commutable;
1471 }
1472
1473 // Vector operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001474 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001475 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001476 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001477
1478 // Intrinsic operation, reg.
Evan Chengb783fa32007-07-19 01:14:50 +00001479 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001480 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001481 [(set VR128:$dst, (F64Int VR128:$src))]> {
1482 let isCommutable = Commutable;
1483 }
1484
1485 // Intrinsic operation, mem.
Evan Chengb783fa32007-07-19 01:14:50 +00001486 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001487 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001488 [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
1489
1490 // Vector intrinsic operation, reg
Evan Chengb783fa32007-07-19 01:14:50 +00001491 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001492 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001493 [(set VR128:$dst, (V2F64Int VR128:$src))]> {
1494 let isCommutable = Commutable;
1495 }
1496
1497 // Vector intrinsic operation, mem
Dan Gohmanc747be52007-08-02 21:06:40 +00001498 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001499 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001500 [(set VR128:$dst, (V2F64Int (load addr:$src)))]>;
1501}
1502
1503// Square root.
1504defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
1505 int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
1506
1507// There is no f64 version of the reciprocal approximation instructions.
1508
1509// Logical
1510let isTwoAddress = 1 in {
1511 let isCommutable = 1 in {
1512 def ANDPDrr : PDI<0x54, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001513 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001514 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001515 [(set VR128:$dst,
1516 (and (bc_v2i64 (v2f64 VR128:$src1)),
1517 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1518 def ORPDrr : PDI<0x56, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001519 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001520 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001521 [(set VR128:$dst,
1522 (or (bc_v2i64 (v2f64 VR128:$src1)),
1523 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1524 def XORPDrr : PDI<0x57, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001525 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001526 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001527 [(set VR128:$dst,
1528 (xor (bc_v2i64 (v2f64 VR128:$src1)),
1529 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1530 }
1531
1532 def ANDPDrm : PDI<0x54, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001533 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001534 "andpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001535 [(set VR128:$dst,
1536 (and (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001537 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001538 def ORPDrm : PDI<0x56, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001539 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001540 "orpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001541 [(set VR128:$dst,
1542 (or (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001543 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001544 def XORPDrm : PDI<0x57, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001545 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001546 "xorpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001547 [(set VR128:$dst,
1548 (xor (bc_v2i64 (v2f64 VR128:$src1)),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001549 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001550 def ANDNPDrr : PDI<0x55, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001551 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001552 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001553 [(set VR128:$dst,
1554 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
1555 (bc_v2i64 (v2f64 VR128:$src2))))]>;
1556 def ANDNPDrm : PDI<0x55, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001557 (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001558 "andnpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001559 [(set VR128:$dst,
1560 (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
Evan Cheng8e92cd12007-07-19 23:34:10 +00001561 (memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001562}
1563
1564let isTwoAddress = 1 in {
1565 def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001566 (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001567 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001568 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1569 VR128:$src, imm:$cc))]>;
1570 def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001571 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
Dan Gohman91888f02007-07-31 20:11:57 +00001572 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001573 [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
1574 (load addr:$src), imm:$cc))]>;
1575}
1576
1577// Shuffle and unpack instructions
1578let isTwoAddress = 1 in {
1579 def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001580 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001581 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001582 [(set VR128:$dst, (v2f64 (vector_shuffle
1583 VR128:$src1, VR128:$src2,
1584 SHUFP_shuffle_mask:$src3)))]>;
1585 def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001586 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001587 f128mem:$src2, i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001588 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001589 [(set VR128:$dst,
1590 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001591 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001592 SHUFP_shuffle_mask:$src3)))]>;
1593
1594 let AddedComplexity = 10 in {
1595 def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001596 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001597 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001598 [(set VR128:$dst,
1599 (v2f64 (vector_shuffle
1600 VR128:$src1, VR128:$src2,
1601 UNPCKH_shuffle_mask)))]>;
1602 def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001603 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001604 "unpckhpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001605 [(set VR128:$dst,
1606 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001607 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001608 UNPCKH_shuffle_mask)))]>;
1609
1610 def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001611 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001612 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001613 [(set VR128:$dst,
1614 (v2f64 (vector_shuffle
1615 VR128:$src1, VR128:$src2,
1616 UNPCKL_shuffle_mask)))]>;
1617 def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001618 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001619 "unpcklpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001620 [(set VR128:$dst,
1621 (v2f64 (vector_shuffle
Dan Gohman7dc19012007-08-02 21:17:01 +00001622 VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001623 UNPCKL_shuffle_mask)))]>;
1624 } // AddedComplexity
1625} // isTwoAddress
1626
1627
1628//===----------------------------------------------------------------------===//
1629// SSE integer instructions
1630
1631// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00001632def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001633 "movdqa\t{$src, $dst|$dst, $src}", []>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001634let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001635def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001636 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001637 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001638def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001639 "movdqa\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001640 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
Evan Cheng4e84e452007-08-30 05:49:43 +00001641let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001642def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001643 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001644 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001645 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001646def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001647 "movdqu\t{$src, $dst|$dst, $src}",
Evan Cheng51a49b22007-07-20 00:27:43 +00001648 [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001649 XS, Requires<[HasSSE2]>;
1650
Dan Gohman4a4f1512007-07-18 20:23:34 +00001651// Intrinsic forms of MOVDQU load and store
Evan Cheng4e84e452007-08-30 05:49:43 +00001652let isLoad = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00001653def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001654 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001655 [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
1656 XS, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001657def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00001658 "movdqu\t{$src, $dst|$dst, $src}",
Dan Gohman4a4f1512007-07-18 20:23:34 +00001659 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
1660 XS, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001661
1662let isTwoAddress = 1 in {
1663
1664multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
1665 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001666 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001667 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001668 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
1669 let isCommutable = Commutable;
1670 }
Evan Chengb783fa32007-07-19 01:14:50 +00001671 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001672 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001673 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001674 (bitconvert (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001675}
1676
1677multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
1678 string OpcodeStr, Intrinsic IntId> {
Evan Chengb783fa32007-07-19 01:14:50 +00001679 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001680 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001681 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001682 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001683 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001684 [(set VR128:$dst, (IntId VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001685 (bitconvert (memopv2i64 addr:$src2))))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00001686 def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001687 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001688 [(set VR128:$dst, (IntId VR128:$src1,
1689 (scalar_to_vector (i32 imm:$src2))))]>;
1690}
1691
1692
1693/// PDI_binop_rm - Simple SSE2 binary operator.
1694multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
1695 ValueType OpVT, bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001696 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001697 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001698 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
1699 let isCommutable = Commutable;
1700 }
Evan Chengb783fa32007-07-19 01:14:50 +00001701 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001702 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001703 [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001704 (bitconvert (memopv2i64 addr:$src2)))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001705}
1706
1707/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
1708///
1709/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
1710/// to collapse (bitconvert VT to VT) into its operand.
1711///
1712multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
1713 bit Commutable = 0> {
Evan Chengb783fa32007-07-19 01:14:50 +00001714 def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001715 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001716 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
1717 let isCommutable = Commutable;
1718 }
Evan Chengb783fa32007-07-19 01:14:50 +00001719 def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001720 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohman4a4f1512007-07-18 20:23:34 +00001721 [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001722}
1723
1724} // isTwoAddress
1725
1726// 128-bit Integer Arithmetic
1727
1728defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
1729defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
1730defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
1731defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
1732
1733defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
1734defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
1735defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
1736defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
1737
1738defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
1739defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
1740defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
1741defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
1742
1743defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
1744defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
1745defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
1746defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
1747
1748defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
1749
1750defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
1751defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
1752defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
1753
1754defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
1755
1756defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
1757defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
1758
1759
1760defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
1761defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
1762defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
1763defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
1764defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
1765
1766
1767defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w>;
1768defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_sse2_psll_d>;
1769defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_sse2_psll_q>;
1770
1771defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_sse2_psrl_w>;
1772defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_sse2_psrl_d>;
1773defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_sse2_psrl_q>;
1774
1775defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_sse2_psra_w>;
1776defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d>;
1777// PSRAQ doesn't exist in SSE[1-3].
1778
1779// 128-bit logical shifts.
1780let isTwoAddress = 1 in {
1781 def PSLLDQri : PDIi8<0x73, MRM7r,
Evan Chengb783fa32007-07-19 01:14:50 +00001782 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001783 "pslldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001784 def PSRLDQri : PDIi8<0x73, MRM3r,
Evan Chengb783fa32007-07-19 01:14:50 +00001785 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001786 "psrldq\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001787 // PSRADQri doesn't exist in SSE[1-3].
1788}
1789
1790let Predicates = [HasSSE2] in {
1791 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
1792 (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1793 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
1794 (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1795 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
1796 (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
1797}
1798
1799// Logical
1800defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
1801defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
1802defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
1803
1804let isTwoAddress = 1 in {
1805 def PANDNrr : PDI<0xDF, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001806 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001807 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001808 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
1809 VR128:$src2)))]>;
1810
1811 def PANDNrm : PDI<0xDF, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001812 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001813 "pandn\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001814 [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
Dan Gohman7dc19012007-08-02 21:17:01 +00001815 (memopv2i64 addr:$src2))))]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001816}
1817
1818// SSE2 Integer comparison
1819defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
1820defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
1821defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
1822defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
1823defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
1824defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
1825
1826// Pack instructions
1827defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
1828defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
1829defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
1830
1831// Shuffle and unpack instructions
1832def PSHUFDri : PDIi8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001833 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001834 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001835 [(set VR128:$dst, (v4i32 (vector_shuffle
1836 VR128:$src1, (undef),
1837 PSHUFD_shuffle_mask:$src2)))]>;
1838def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001839 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001840 "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001841 [(set VR128:$dst, (v4i32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001842 (bc_v4i32(memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001843 (undef),
1844 PSHUFD_shuffle_mask:$src2)))]>;
1845
1846// SSE2 with ImmT == Imm8 and XS prefix.
1847def PSHUFHWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001848 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001849 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001850 [(set VR128:$dst, (v8i16 (vector_shuffle
1851 VR128:$src1, (undef),
1852 PSHUFHW_shuffle_mask:$src2)))]>,
1853 XS, Requires<[HasSSE2]>;
1854def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001855 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001856 "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001857 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001858 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001859 (undef),
1860 PSHUFHW_shuffle_mask:$src2)))]>,
1861 XS, Requires<[HasSSE2]>;
1862
1863// SSE2 with ImmT == Imm8 and XD prefix.
1864def PSHUFLWri : Ii8<0x70, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001865 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001866 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001867 [(set VR128:$dst, (v8i16 (vector_shuffle
1868 VR128:$src1, (undef),
1869 PSHUFLW_shuffle_mask:$src2)))]>,
1870 XD, Requires<[HasSSE2]>;
1871def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001872 (outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001873 "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001874 [(set VR128:$dst, (v8i16 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00001875 (bc_v8i16 (memopv2i64 addr:$src1)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001876 (undef),
1877 PSHUFLW_shuffle_mask:$src2)))]>,
1878 XD, Requires<[HasSSE2]>;
1879
1880
1881let isTwoAddress = 1 in {
1882 def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001883 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001884 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001885 [(set VR128:$dst,
1886 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1887 UNPCKL_shuffle_mask)))]>;
1888 def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001889 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001890 "punpcklbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001891 [(set VR128:$dst,
1892 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001893 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001894 UNPCKL_shuffle_mask)))]>;
1895 def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001896 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001897 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001898 [(set VR128:$dst,
1899 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1900 UNPCKL_shuffle_mask)))]>;
1901 def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001902 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001903 "punpcklwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001904 [(set VR128:$dst,
1905 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001906 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001907 UNPCKL_shuffle_mask)))]>;
1908 def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001909 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001910 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001911 [(set VR128:$dst,
1912 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1913 UNPCKL_shuffle_mask)))]>;
1914 def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001915 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001916 "punpckldq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001917 [(set VR128:$dst,
1918 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001919 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001920 UNPCKL_shuffle_mask)))]>;
1921 def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001922 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001923 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001924 [(set VR128:$dst,
1925 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1926 UNPCKL_shuffle_mask)))]>;
1927 def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001928 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001929 "punpcklqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001930 [(set VR128:$dst,
1931 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001932 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001933 UNPCKL_shuffle_mask)))]>;
1934
1935 def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001936 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001937 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001938 [(set VR128:$dst,
1939 (v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
1940 UNPCKH_shuffle_mask)))]>;
1941 def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001942 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001943 "punpckhbw\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001944 [(set VR128:$dst,
1945 (v16i8 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001946 (bc_v16i8 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001947 UNPCKH_shuffle_mask)))]>;
1948 def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001949 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001950 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001951 [(set VR128:$dst,
1952 (v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
1953 UNPCKH_shuffle_mask)))]>;
1954 def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001955 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001956 "punpckhwd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001957 [(set VR128:$dst,
1958 (v8i16 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001959 (bc_v8i16 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001960 UNPCKH_shuffle_mask)))]>;
1961 def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001962 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001963 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001964 [(set VR128:$dst,
1965 (v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
1966 UNPCKH_shuffle_mask)))]>;
1967 def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001968 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001969 "punpckhdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001970 [(set VR128:$dst,
1971 (v4i32 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001972 (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001973 UNPCKH_shuffle_mask)))]>;
1974 def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001975 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001976 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001977 [(set VR128:$dst,
1978 (v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
1979 UNPCKH_shuffle_mask)))]>;
1980 def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00001981 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001982 "punpckhqdq\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001983 [(set VR128:$dst,
1984 (v2i64 (vector_shuffle VR128:$src1,
Dan Gohman4a4f1512007-07-18 20:23:34 +00001985 (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001986 UNPCKH_shuffle_mask)))]>;
1987}
1988
1989// Extract / Insert
1990def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001991 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00001992 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001993 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
1994 (iPTR imm:$src2)))]>;
1995let isTwoAddress = 1 in {
1996 def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00001997 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00001998 GR32:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00001999 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002000 [(set VR128:$dst,
2001 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
2002 GR32:$src2, (iPTR imm:$src3))))]>;
2003 def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002004 (outs VR128:$dst), (ins VR128:$src1,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002005 i16mem:$src2, i32i8imm:$src3),
Dan Gohman91888f02007-07-31 20:11:57 +00002006 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002007 [(set VR128:$dst,
2008 (v8i16 (X86pinsrw (v8i16 VR128:$src1),
2009 (i32 (anyext (loadi16 addr:$src2))),
2010 (iPTR imm:$src3))))]>;
2011}
2012
2013// Mask creation
Evan Chengb783fa32007-07-19 01:14:50 +00002014def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002015 "pmovmskb\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002016 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
2017
2018// Conditional store
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002019let Uses = [EDI] in
Evan Chengb783fa32007-07-19 01:14:50 +00002020def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
Dan Gohman91888f02007-07-31 20:11:57 +00002021 "maskmovdqu\t{$mask, $src|$src, $mask}",
Evan Cheng6e4d1d92007-09-11 19:55:27 +00002022 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002023
2024// Non-temporal stores
Evan Chengb783fa32007-07-19 01:14:50 +00002025def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002026 "movntpd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002027 [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002028def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002029 "movntdq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002030 [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002031def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002032 "movnti\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002033 [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
2034 TB, Requires<[HasSSE2]>;
2035
2036// Flush cache
Evan Chengb783fa32007-07-19 01:14:50 +00002037def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002038 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002039 TB, Requires<[HasSSE2]>;
2040
2041// Load, store, and memory fence
Evan Chengb783fa32007-07-19 01:14:50 +00002042def LFENCE : I<0xAE, MRM5m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002043 "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002044def MFENCE : I<0xAE, MRM6m, (outs), (ins),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002045 "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
2046
2047
2048// Alias instructions that map zero vector to pxor / xorp* for sse.
2049// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
2050let isReMaterializable = 1 in
Evan Chengb783fa32007-07-19 01:14:50 +00002051 def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
Dan Gohman91888f02007-07-31 20:11:57 +00002052 "pcmpeqd\t$dst, $dst",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002053 [(set VR128:$dst, (v2f64 immAllOnesV))]>;
2054
2055// FR64 to 128-bit vector conversion.
Evan Chengb783fa32007-07-19 01:14:50 +00002056def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002057 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002058 [(set VR128:$dst,
2059 (v2f64 (scalar_to_vector FR64:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002060def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002061 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002062 [(set VR128:$dst,
2063 (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
2064
Evan Chengb783fa32007-07-19 01:14:50 +00002065def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002066 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002067 [(set VR128:$dst,
2068 (v4i32 (scalar_to_vector GR32:$src)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002069def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002070 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002071 [(set VR128:$dst,
2072 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
2073
Evan Chengb783fa32007-07-19 01:14:50 +00002074def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002075 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002076 [(set FR32:$dst, (bitconvert GR32:$src))]>;
2077
Evan Chengb783fa32007-07-19 01:14:50 +00002078def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002079 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002080 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
2081
2082// SSE2 instructions with XS prefix
Evan Chengb783fa32007-07-19 01:14:50 +00002083def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002084 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002085 [(set VR128:$dst,
2086 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
2087 Requires<[HasSSE2]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002088def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002089 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002090 [(store (i64 (vector_extract (v2i64 VR128:$src),
2091 (iPTR 0))), addr:$dst)]>;
2092
2093// FIXME: may not be able to eliminate this movss with coalescing the src and
2094// dest register classes are different. We really want to write this pattern
2095// like this:
2096// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
2097// (f32 FR32:$src)>;
Evan Chengb783fa32007-07-19 01:14:50 +00002098def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002099 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002100 [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
2101 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002102def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002103 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002104 [(store (f64 (vector_extract (v2f64 VR128:$src),
2105 (iPTR 0))), addr:$dst)]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002106def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002107 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002108 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
2109 (iPTR 0)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002110def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002111 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002112 [(store (i32 (vector_extract (v4i32 VR128:$src),
2113 (iPTR 0))), addr:$dst)]>;
2114
Evan Chengb783fa32007-07-19 01:14:50 +00002115def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002116 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002117 [(set GR32:$dst, (bitconvert FR32:$src))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002118def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002119 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002120 [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
2121
2122
2123// Move to lower bits of a VR128, leaving upper bits alone.
2124// Three operand (but two address) aliases.
2125let isTwoAddress = 1 in {
2126 def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002127 (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002128 "movsd\t{$src2, $dst|$dst, $src2}", []>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002129
2130 let AddedComplexity = 15 in
2131 def MOVLPDrr : SDI<0x10, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002132 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002133 "movsd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002134 [(set VR128:$dst,
2135 (v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
2136 MOVL_shuffle_mask)))]>;
2137}
2138
2139// Store / copy lower 64-bits of a XMM register.
Evan Chengb783fa32007-07-19 01:14:50 +00002140def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002141 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002142 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
2143
2144// Move to lower bits of a VR128 and zeroing upper bits.
2145// Loading from memory automatically zeroing upper bits.
2146let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002147 def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002148 "movsd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002149 [(set VR128:$dst,
2150 (v2f64 (vector_shuffle immAllZerosV,
2151 (v2f64 (scalar_to_vector
2152 (loadf64 addr:$src))),
2153 MOVL_shuffle_mask)))]>;
2154
2155let AddedComplexity = 15 in
2156// movd / movq to XMM register zero-extends
Evan Chengb783fa32007-07-19 01:14:50 +00002157def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002158 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002159 [(set VR128:$dst,
2160 (v4i32 (vector_shuffle immAllZerosV,
2161 (v4i32 (scalar_to_vector GR32:$src)),
2162 MOVL_shuffle_mask)))]>;
2163let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002164def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002165 "movd\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002166 [(set VR128:$dst,
2167 (v4i32 (vector_shuffle immAllZerosV,
2168 (v4i32 (scalar_to_vector (loadi32 addr:$src))),
2169 MOVL_shuffle_mask)))]>;
2170
2171// Moving from XMM to XMM but still clear upper 64 bits.
2172let AddedComplexity = 15 in
Evan Chengb783fa32007-07-19 01:14:50 +00002173def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002174 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002175 [(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
2176 XS, Requires<[HasSSE2]>;
2177let AddedComplexity = 20 in
Evan Chengb783fa32007-07-19 01:14:50 +00002178def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002179 "movq\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002180 [(set VR128:$dst, (int_x86_sse2_movl_dq
Dan Gohman4a4f1512007-07-18 20:23:34 +00002181 (bitconvert (memopv2i64 addr:$src))))]>,
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002182 XS, Requires<[HasSSE2]>;
2183
2184
2185//===----------------------------------------------------------------------===//
2186// SSE3 Instructions
2187//===----------------------------------------------------------------------===//
2188
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002189// Move Instructions
Evan Chengb783fa32007-07-19 01:14:50 +00002190def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002191 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002192 [(set VR128:$dst, (v4f32 (vector_shuffle
2193 VR128:$src, (undef),
2194 MOVSHDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002195def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002196 "movshdup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002197 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002198 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002199 MOVSHDUP_shuffle_mask)))]>;
2200
Evan Chengb783fa32007-07-19 01:14:50 +00002201def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002202 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002203 [(set VR128:$dst, (v4f32 (vector_shuffle
2204 VR128:$src, (undef),
2205 MOVSLDUP_shuffle_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002206def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002207 "movsldup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002208 [(set VR128:$dst, (v4f32 (vector_shuffle
Dan Gohman4a4f1512007-07-18 20:23:34 +00002209 (memopv4f32 addr:$src), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002210 MOVSLDUP_shuffle_mask)))]>;
2211
Evan Chengb783fa32007-07-19 01:14:50 +00002212def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002213 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002214 [(set VR128:$dst, (v2f64 (vector_shuffle
2215 VR128:$src, (undef),
2216 SSE_splat_lo_mask)))]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002217def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002218 "movddup\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002219 [(set VR128:$dst,
2220 (v2f64 (vector_shuffle
2221 (scalar_to_vector (loadf64 addr:$src)),
2222 (undef),
2223 SSE_splat_lo_mask)))]>;
2224
2225// Arithmetic
2226let isTwoAddress = 1 in {
2227 def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002228 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002229 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002230 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2231 VR128:$src2))]>;
2232 def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002233 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002234 "addsubps\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002235 [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
2236 (load addr:$src2)))]>;
2237 def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
Evan Chengb783fa32007-07-19 01:14:50 +00002238 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002239 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002240 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2241 VR128:$src2))]>;
2242 def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
Evan Chengb783fa32007-07-19 01:14:50 +00002243 (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002244 "addsubpd\t{$src2, $dst|$dst, $src2}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002245 [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
2246 (load addr:$src2)))]>;
2247}
2248
Evan Chengb783fa32007-07-19 01:14:50 +00002249def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
Dan Gohman91888f02007-07-31 20:11:57 +00002250 "lddqu\t{$src, $dst|$dst, $src}",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002251 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
2252
2253// Horizontal ops
2254class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002255 : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002256 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002257 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
2258class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002259 : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002260 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002261 [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
2262class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002263 : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002264 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002265 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
2266class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
Evan Chengb783fa32007-07-19 01:14:50 +00002267 : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
Dan Gohman91888f02007-07-31 20:11:57 +00002268 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002269 [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
2270
2271let isTwoAddress = 1 in {
2272 def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2273 def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
2274 def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2275 def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
2276 def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2277 def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
2278 def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2279 def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
2280}
2281
2282// Thread synchronization
Evan Chengb783fa32007-07-19 01:14:50 +00002283def MONITOR : I<0xC8, RawFrm, (outs), (ins), "monitor",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002284 [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002285def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002286 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
2287
2288// vector_shuffle v1, <undef> <1, 1, 3, 3>
2289let AddedComplexity = 15 in
2290def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2291 MOVSHDUP_shuffle_mask)),
2292 (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2293let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002294def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002295 MOVSHDUP_shuffle_mask)),
2296 (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
2297
2298// vector_shuffle v1, <undef> <0, 0, 2, 2>
2299let AddedComplexity = 15 in
2300 def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2301 MOVSLDUP_shuffle_mask)),
2302 (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
2303let AddedComplexity = 20 in
Dan Gohman4a4f1512007-07-18 20:23:34 +00002304 def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002305 MOVSLDUP_shuffle_mask)),
2306 (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
2307
2308//===----------------------------------------------------------------------===//
2309// SSSE3 Instructions
2310//===----------------------------------------------------------------------===//
2311
Bill Wendling3b15d722007-08-11 09:52:53 +00002312// SSSE3 Instruction Templates:
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002313//
Bill Wendling98680292007-08-10 06:22:27 +00002314// SS38I - SSSE3 instructions with T8 prefix.
2315// SS3AI - SSSE3 instructions with TA prefix.
Bill Wendling3b15d722007-08-11 09:52:53 +00002316//
2317// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
2318// uses the MMX registers. We put those instructions here because they better
2319// fit into the SSSE3 instruction category rather than the MMX category.
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002320
Evan Chengb783fa32007-07-19 01:14:50 +00002321class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
2322 list<dag> pattern>
Bill Wendling98680292007-08-10 06:22:27 +00002323 : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
Evan Chengb783fa32007-07-19 01:14:50 +00002324class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
2325 list<dag> pattern>
Bill Wendling98680292007-08-10 06:22:27 +00002326 : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002327
Bill Wendling98680292007-08-10 06:22:27 +00002328/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002329let isTwoAddress = 1 in {
Bill Wendling98680292007-08-10 06:22:27 +00002330 multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
2331 Intrinsic IntId64, Intrinsic IntId128,
2332 bit Commutable = 0> {
2333 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
2334 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2335 [(set VR64:$dst, (IntId64 VR64:$src))]> {
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002336 let isCommutable = Commutable;
2337 }
Bill Wendling98680292007-08-10 06:22:27 +00002338 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
2339 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2340 [(set VR64:$dst,
2341 (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
2342
2343 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2344 (ins VR128:$src),
2345 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2346 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2347 OpSize {
2348 let isCommutable = Commutable;
2349 }
2350 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2351 (ins i128mem:$src),
2352 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2353 [(set VR128:$dst,
2354 (IntId128
2355 (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002356 }
2357}
2358
Bill Wendling98680292007-08-10 06:22:27 +00002359/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
2360let isTwoAddress = 1 in {
2361 multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
2362 Intrinsic IntId64, Intrinsic IntId128,
2363 bit Commutable = 0> {
2364 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2365 (ins VR64:$src),
2366 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2367 [(set VR64:$dst, (IntId64 VR64:$src))]> {
2368 let isCommutable = Commutable;
2369 }
2370 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2371 (ins i64mem:$src),
2372 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2373 [(set VR64:$dst,
2374 (IntId64
2375 (bitconvert (memopv4i16 addr:$src))))]>;
2376
2377 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2378 (ins VR128:$src),
2379 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2380 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2381 OpSize {
2382 let isCommutable = Commutable;
2383 }
2384 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2385 (ins i128mem:$src),
2386 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2387 [(set VR128:$dst,
2388 (IntId128
2389 (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
2390 }
2391}
2392
2393/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
2394let isTwoAddress = 1 in {
2395 multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
2396 Intrinsic IntId64, Intrinsic IntId128,
2397 bit Commutable = 0> {
2398 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2399 (ins VR64:$src),
2400 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2401 [(set VR64:$dst, (IntId64 VR64:$src))]> {
2402 let isCommutable = Commutable;
2403 }
2404 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2405 (ins i64mem:$src),
2406 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2407 [(set VR64:$dst,
2408 (IntId64
2409 (bitconvert (memopv2i32 addr:$src))))]>;
2410
2411 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2412 (ins VR128:$src),
2413 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2414 [(set VR128:$dst, (IntId128 VR128:$src))]>,
2415 OpSize {
2416 let isCommutable = Commutable;
2417 }
2418 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2419 (ins i128mem:$src),
2420 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2421 [(set VR128:$dst,
2422 (IntId128
2423 (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
2424 }
2425}
2426
2427defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
2428 int_x86_ssse3_pabs_b,
2429 int_x86_ssse3_pabs_b_128>;
2430defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
2431 int_x86_ssse3_pabs_w,
2432 int_x86_ssse3_pabs_w_128>;
2433defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
2434 int_x86_ssse3_pabs_d,
2435 int_x86_ssse3_pabs_d_128>;
2436
2437/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
2438let isTwoAddress = 1 in {
2439 multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
2440 Intrinsic IntId64, Intrinsic IntId128,
2441 bit Commutable = 0> {
2442 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2443 (ins VR64:$src1, VR64:$src2),
2444 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2445 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2446 let isCommutable = Commutable;
2447 }
2448 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2449 (ins VR64:$src1, i64mem:$src2),
2450 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2451 [(set VR64:$dst,
2452 (IntId64 VR64:$src1,
2453 (bitconvert (memopv8i8 addr:$src2))))]>;
2454
2455 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2456 (ins VR128:$src1, VR128:$src2),
2457 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2458 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2459 OpSize {
2460 let isCommutable = Commutable;
2461 }
2462 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2463 (ins VR128:$src1, i128mem:$src2),
2464 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2465 [(set VR128:$dst,
2466 (IntId128 VR128:$src1,
2467 (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
2468 }
2469}
2470
2471/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
2472let isTwoAddress = 1 in {
2473 multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
2474 Intrinsic IntId64, Intrinsic IntId128,
2475 bit Commutable = 0> {
2476 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2477 (ins VR64:$src1, VR64:$src2),
2478 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2479 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2480 let isCommutable = Commutable;
2481 }
2482 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2483 (ins VR64:$src1, i64mem:$src2),
2484 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2485 [(set VR64:$dst,
2486 (IntId64 VR64:$src1,
2487 (bitconvert (memopv4i16 addr:$src2))))]>;
2488
2489 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2490 (ins VR128:$src1, VR128:$src2),
2491 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2492 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2493 OpSize {
2494 let isCommutable = Commutable;
2495 }
2496 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2497 (ins VR128:$src1, i128mem:$src2),
2498 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2499 [(set VR128:$dst,
2500 (IntId128 VR128:$src1,
2501 (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
2502 }
2503}
2504
2505/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
2506let isTwoAddress = 1 in {
2507 multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
2508 Intrinsic IntId64, Intrinsic IntId128,
2509 bit Commutable = 0> {
2510 def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
2511 (ins VR64:$src1, VR64:$src2),
2512 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2513 [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
2514 let isCommutable = Commutable;
2515 }
2516 def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
2517 (ins VR64:$src1, i64mem:$src2),
2518 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2519 [(set VR64:$dst,
2520 (IntId64 VR64:$src1,
2521 (bitconvert (memopv2i32 addr:$src2))))]>;
2522
2523 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
2524 (ins VR128:$src1, VR128:$src2),
2525 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2526 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
2527 OpSize {
2528 let isCommutable = Commutable;
2529 }
2530 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
2531 (ins VR128:$src1, i128mem:$src2),
2532 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2533 [(set VR128:$dst,
2534 (IntId128 VR128:$src1,
2535 (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
2536 }
2537}
2538
2539defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
2540 int_x86_ssse3_phadd_w,
2541 int_x86_ssse3_phadd_w_128, 1>;
2542defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
2543 int_x86_ssse3_phadd_d,
2544 int_x86_ssse3_phadd_d_128, 1>;
2545defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
2546 int_x86_ssse3_phadd_sw,
2547 int_x86_ssse3_phadd_sw_128, 1>;
2548defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
2549 int_x86_ssse3_phsub_w,
2550 int_x86_ssse3_phsub_w_128>;
2551defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
2552 int_x86_ssse3_phsub_d,
2553 int_x86_ssse3_phsub_d_128>;
2554defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
2555 int_x86_ssse3_phsub_sw,
2556 int_x86_ssse3_phsub_sw_128>;
2557defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
2558 int_x86_ssse3_pmadd_ub_sw,
2559 int_x86_ssse3_pmadd_ub_sw_128, 1>;
2560defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
2561 int_x86_ssse3_pmul_hr_sw,
2562 int_x86_ssse3_pmul_hr_sw_128, 1>;
2563defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
2564 int_x86_ssse3_pshuf_b,
2565 int_x86_ssse3_pshuf_b_128>;
2566defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
2567 int_x86_ssse3_psign_b,
2568 int_x86_ssse3_psign_b_128>;
2569defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
2570 int_x86_ssse3_psign_w,
2571 int_x86_ssse3_psign_w_128>;
2572defm PSIGND : SS3I_binop_rm_int_32<0x09, "psignd",
2573 int_x86_ssse3_psign_d,
2574 int_x86_ssse3_psign_d_128>;
2575
2576let isTwoAddress = 1 in {
Bill Wendling1dc817c2007-08-10 09:00:17 +00002577 def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2578 (ins VR64:$src1, VR64:$src2, i16imm:$src3),
2579 "palignr\t{$src2, $dst|$dst, $src2}",
2580 [(set VR64:$dst,
2581 (int_x86_ssse3_palign_r
2582 VR64:$src1, VR64:$src2,
2583 imm:$src3))]>;
2584 def PALIGNR64rm : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
2585 (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
2586 "palignr\t{$src2, $dst|$dst, $src2}",
2587 [(set VR64:$dst,
2588 (int_x86_ssse3_palign_r
2589 VR64:$src1,
2590 (bitconvert (memopv2i32 addr:$src2)),
2591 imm:$src3))]>;
Bill Wendling98680292007-08-10 06:22:27 +00002592
Bill Wendling1dc817c2007-08-10 09:00:17 +00002593 def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2594 (ins VR128:$src1, VR128:$src2, i32imm:$src3),
2595 "palignr\t{$src2, $dst|$dst, $src2}",
2596 [(set VR128:$dst,
2597 (int_x86_ssse3_palign_r_128
2598 VR128:$src1, VR128:$src2,
2599 imm:$src3))]>, OpSize;
2600 def PALIGNR128rm : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
2601 (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
2602 "palignr\t{$src2, $dst|$dst, $src2}",
2603 [(set VR128:$dst,
2604 (int_x86_ssse3_palign_r_128
2605 VR128:$src1,
2606 (bitconvert (memopv4i32 addr:$src2)),
2607 imm:$src3))]>, OpSize;
Bill Wendling98680292007-08-10 06:22:27 +00002608}
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002609
2610//===----------------------------------------------------------------------===//
2611// Non-Instruction Patterns
2612//===----------------------------------------------------------------------===//
2613
2614// 128-bit vector undef's.
Bill Wendling1dc817c2007-08-10 09:00:17 +00002615def : Pat<(v4f32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002616def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2617def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2618def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2619def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2620def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
2621
2622// 128-bit vector all zero's.
2623def : Pat<(v16i8 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2624def : Pat<(v8i16 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2625def : Pat<(v4i32 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2626def : Pat<(v2i64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2627def : Pat<(v2f64 immAllZerosV), (V_SET0)>, Requires<[HasSSE2]>;
2628
2629// 128-bit vector all one's.
2630def : Pat<(v16i8 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2631def : Pat<(v8i16 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2632def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2633def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
2634def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
2635
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002636
2637// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
2638// 16-bits matter.
2639def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2640 Requires<[HasSSE2]>;
2641def : Pat<(v16i8 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,
2642 Requires<[HasSSE2]>;
2643
2644// bit_convert
2645let Predicates = [HasSSE2] in {
2646 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
2647 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
2648 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
2649 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
2650 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
2651 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
2652 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
2653 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
2654 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
2655 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
2656 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
2657 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
2658 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
2659 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
2660 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
2661 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
2662 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
2663 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
2664 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
2665 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
2666 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
2667 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
2668 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
2669 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
2670 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
2671 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
2672 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
2673 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
2674 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
2675 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
2676}
2677
2678// Move scalar to XMM zero-extended
2679// movd to XMM register zero-extends
2680let AddedComplexity = 15 in {
2681def : Pat<(v8i16 (vector_shuffle immAllZerosV,
2682 (v8i16 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2683 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2684def : Pat<(v16i8 (vector_shuffle immAllZerosV,
2685 (v16i8 (X86s2vec GR32:$src)), MOVL_shuffle_mask)),
2686 (MOVZDI2PDIrr GR32:$src)>, Requires<[HasSSE2]>;
2687// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
2688def : Pat<(v2f64 (vector_shuffle immAllZerosV,
2689 (v2f64 (scalar_to_vector FR64:$src)), MOVL_shuffle_mask)),
2690 (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
2691def : Pat<(v4f32 (vector_shuffle immAllZerosV,
2692 (v4f32 (scalar_to_vector FR32:$src)), MOVL_shuffle_mask)),
2693 (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
2694}
2695
2696// Splat v2f64 / v2i64
2697let AddedComplexity = 10 in {
2698def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2699 (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2700def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2701 (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2702def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
2703 (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2704def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
2705 (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2706}
2707
2708// Splat v4f32
2709def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
2710 (SHUFPSrri VR128:$src, VR128:$src, SSE_splat_mask:$sm)>,
2711 Requires<[HasSSE1]>;
2712
2713// Special unary SHUFPSrri case.
2714// FIXME: when we want non two-address code, then we should use PSHUFD?
2715def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
2716 SHUFP_unary_shuffle_mask:$sm),
2717 (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2718 Requires<[HasSSE1]>;
Dan Gohman7dc19012007-08-02 21:17:01 +00002719// Special unary SHUFPDrri case.
2720def : Pat<(vector_shuffle (v2f64 VR128:$src1), (undef),
2721 SHUFP_unary_shuffle_mask:$sm),
2722 (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2723 Requires<[HasSSE2]>;
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002724// Unary v4f32 shuffle with PSHUF* in order to fold a load.
Dan Gohman4a4f1512007-07-18 20:23:34 +00002725def : Pat<(vector_shuffle (memopv4f32 addr:$src1), (undef),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002726 SHUFP_unary_shuffle_mask:$sm),
2727 (PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
2728 Requires<[HasSSE2]>;
2729// Special binary v4i32 shuffle cases with SHUFPS.
2730def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
2731 PSHUFD_binary_shuffle_mask:$sm),
2732 (SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2733 Requires<[HasSSE2]>;
2734def : Pat<(vector_shuffle (v4i32 VR128:$src1),
Dan Gohman4a4f1512007-07-18 20:23:34 +00002735 (bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002736 (SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
2737 Requires<[HasSSE2]>;
2738
2739// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
2740let AddedComplexity = 10 in {
2741def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2742 UNPCKL_v_undef_shuffle_mask)),
2743 (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2744def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2745 UNPCKL_v_undef_shuffle_mask)),
2746 (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2747def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2748 UNPCKL_v_undef_shuffle_mask)),
2749 (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2750def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2751 UNPCKL_v_undef_shuffle_mask)),
2752 (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2753}
2754
2755// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
2756let AddedComplexity = 10 in {
2757def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
2758 UNPCKH_v_undef_shuffle_mask)),
2759 (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2760def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
2761 UNPCKH_v_undef_shuffle_mask)),
2762 (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2763def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
2764 UNPCKH_v_undef_shuffle_mask)),
2765 (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
2766def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
2767 UNPCKH_v_undef_shuffle_mask)),
2768 (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
2769}
2770
2771let AddedComplexity = 15 in {
2772// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
2773def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2774 MOVHP_shuffle_mask)),
2775 (MOVLHPSrr VR128:$src1, VR128:$src2)>;
2776
2777// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
2778def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2779 MOVHLPS_shuffle_mask)),
2780 (MOVHLPSrr VR128:$src1, VR128:$src2)>;
2781
2782// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
2783def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
2784 MOVHLPS_v_undef_shuffle_mask)),
2785 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2786def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
2787 MOVHLPS_v_undef_shuffle_mask)),
2788 (MOVHLPSrr VR128:$src1, VR128:$src1)>;
2789}
2790
2791let AddedComplexity = 20 in {
2792// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
2793// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
Dan Gohman4a4f1512007-07-18 20:23:34 +00002794def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002795 MOVLP_shuffle_mask)),
2796 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002797def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002798 MOVLP_shuffle_mask)),
2799 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002800def : Pat<(v4f32 (vector_shuffle VR128:$src1, (memopv4f32 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002801 MOVHP_shuffle_mask)),
2802 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002803def : Pat<(v2f64 (vector_shuffle VR128:$src1, (memopv2f64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002804 MOVHP_shuffle_mask)),
2805 (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2806
Dan Gohman4a4f1512007-07-18 20:23:34 +00002807def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002808 MOVLP_shuffle_mask)),
2809 (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002810def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002811 MOVLP_shuffle_mask)),
2812 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002813def : Pat<(v4i32 (vector_shuffle VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002814 MOVHP_shuffle_mask)),
2815 (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
Dan Gohman4a4f1512007-07-18 20:23:34 +00002816def : Pat<(v2i64 (vector_shuffle VR128:$src1, (memopv2i64 addr:$src2),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002817 MOVLP_shuffle_mask)),
2818 (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2819}
2820
2821let AddedComplexity = 15 in {
2822// Setting the lowest element in the vector.
2823def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2824 MOVL_shuffle_mask)),
2825 (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2826def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
2827 MOVL_shuffle_mask)),
2828 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2829
2830// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
2831def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
2832 MOVLP_shuffle_mask)),
2833 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2834def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
2835 MOVLP_shuffle_mask)),
2836 (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2837}
2838
2839// Set lowest element and zero upper elements.
2840let AddedComplexity = 20 in
2841def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV,
2842 (v2f64 (scalar_to_vector (loadf64 addr:$src))),
2843 MOVL_shuffle_mask)),
2844 (MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
2845
2846// FIXME: Temporary workaround since 2-wide shuffle is broken.
2847def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
2848 (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2849def : Pat<(int_x86_sse2_loadh_pd VR128:$src1, addr:$src2),
2850 (v2f64 (MOVHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2851def : Pat<(int_x86_sse2_loadl_pd VR128:$src1, addr:$src2),
2852 (v2f64 (MOVLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2853def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3),
2854 (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>,
2855 Requires<[HasSSE2]>;
2856def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (load addr:$src2), imm:$src3),
2857 (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>,
2858 Requires<[HasSSE2]>;
2859def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2),
2860 (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2861def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (load addr:$src2)),
2862 (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2863def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2),
2864 (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2865def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (load addr:$src2)),
2866 (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2867def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2),
2868 (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2869def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (load addr:$src2)),
2870 (v2i64 (PUNPCKHQDQrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;
2871def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, VR128:$src2),
2872 (v2i64 (PUNPCKLQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>;
2873def : Pat<(int_x86_sse2_punpckl_qdq VR128:$src1, (load addr:$src2)),
2874 (PUNPCKLQDQrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2875
2876// Some special case pandn patterns.
2877def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
2878 VR128:$src2)),
2879 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2880def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
2881 VR128:$src2)),
2882 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2883def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
2884 VR128:$src2)),
2885 (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
2886
2887def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002888 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002889 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2890def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002891 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002892 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2893def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
Dan Gohman7dc19012007-08-02 21:17:01 +00002894 (memopv2i64 addr:$src2))),
Dan Gohmanf17a25c2007-07-18 16:29:46 +00002895 (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
2896
Evan Cheng51a49b22007-07-20 00:27:43 +00002897// Use movaps / movups for SSE integer load / store (one byte shorter).
Dan Gohman11821702007-07-27 17:16:43 +00002898def : Pat<(alignedloadv4i32 addr:$src),
2899 (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
2900def : Pat<(loadv4i32 addr:$src),
2901 (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
Evan Cheng51a49b22007-07-20 00:27:43 +00002902def : Pat<(alignedloadv2i64 addr:$src),
2903 (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
2904def : Pat<(loadv2i64 addr:$src),
2905 (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
2906
2907def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
2908 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2909def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
2910 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2911def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
2912 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2913def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
2914 (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2915def : Pat<(store (v2i64 VR128:$src), addr:$dst),
2916 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2917def : Pat<(store (v4i32 VR128:$src), addr:$dst),
2918 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2919def : Pat<(store (v8i16 VR128:$src), addr:$dst),
2920 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
2921def : Pat<(store (v16i8 VR128:$src), addr:$dst),
2922 (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
Evan Cheng86ab7d32007-07-31 08:04:03 +00002923
2924// (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr)
2925def : Pat<(vector_extract
2926 (bc_v4i32 (v4f32 (scalar_to_vector (loadf32 addr:$src)))), (iPTR 0)),
Evan Cheng43a09ac2007-08-01 21:42:24 +00002927 (MOV32rm addr:$src)>, Requires<[HasSSE2]>;
Evan Cheng86ab7d32007-07-31 08:04:03 +00002928def : Pat<(vector_extract
2929 (bc_v2i64 (v2f64 (scalar_to_vector (loadf64 addr:$src)))), (iPTR 0)),
Evan Cheng43a09ac2007-08-01 21:42:24 +00002930 (MOV64rm addr:$src)>, Requires<[HasSSE2, In64BitMode]>;