blob: 2b84b6c46650cdcdcb737722ea8dbdd73ed46620 [file] [log] [blame]
Adam Nemet5ed17da2014-08-21 19:50:07 +00001// Group template arguments that can be derived from the vector type (EltNum x
2// EltVT). These are things like the register class for the writemask, etc.
3// The idea is to pass one of these as the template argument rather than the
4// individual arguments.
5class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
6 string suffix = ""> {
7 RegisterClass RC = rc;
8
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
11
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
14
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
17 // !lt in tablegen.
18 RegisterClass MRC =
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
21
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
24
25 // The vector VT.
26 ValueType VT = !cast<ValueType>("v" # NumElts # EltVT);
27
28 string EltTypeName = !cast<string>(EltVT);
29 // Size of the element type in bits, e.g. 32 for v16i32.
30 string EltSize = !subst("i", "", !subst("f", "", EltTypeName));
31
32 // "i" for integer types and "f" for floating-point types
33 string TypeVariantName = !subst(EltSize, "", EltTypeName);
34
35 // Size of RC in bits, e.g. 512 for VR512.
36 int Size = VT.Size;
37
38 // The corresponding memory operand, e.g. i512mem for VR512.
39 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
40
41 // The corresponding float type, e.g. v16f32 for v16i32
42 ValueType FloatVT = !if (!eq(TypeVariantName, "i"),
43 !cast<ValueType>("v" # NumElts # "f" # EltSize),
44 VT);
45
46 // The string to specify embedded broadcast in assembly.
47 string BroadcastStr = "{1to" # NumElts # "}";
48}
49
50def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
51def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
52
Adam Nemet2e91ee52014-08-14 17:13:19 +000053// Common base class of AVX512_masking and AVX512_masking_3src.
54multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
55 dag MaskingIns, dag ZeroMaskingIns,
56 string OpcodeStr,
57 string AttSrcAsm, string IntelSrcAsm,
58 dag RHS, dag MaskingRHS, ValueType OpVT,
59 RegisterClass RC, RegisterClass KRC,
60 string MaskingConstraint = ""> {
Adam Nemet2e2537f2014-08-07 17:53:55 +000061 def NAME: AVX512<O, F, Outs, Ins,
62 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
63 "$dst, "#IntelSrcAsm#"}",
64 [(set RC:$dst, RHS)]>;
65
Adam Nemetfa1f7202014-08-07 23:18:18 +000066 // Prefer over VMOV*rrk Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +000067 let AddedComplexity = 20 in
68 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Adam Nemet2e2537f2014-08-07 17:53:55 +000069 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
70 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet2e91ee52014-08-14 17:13:19 +000071 [(set RC:$dst, MaskingRHS)]>,
72 EVEX_K {
73 // In case of the 3src subclass this is overridden with a let.
74 string Constraints = MaskingConstraint;
75 }
Adam Nemet7d498622014-08-07 23:53:38 +000076 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +000077 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Adam Nemet7d498622014-08-07 23:53:38 +000078 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
79 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
80 [(set RC:$dst,
81 (vselect KRC:$mask, RHS,
82 (OpVT (bitconvert
83 (v16i32 immAllZerosV)))))]>,
84 EVEX_KZ;
Adam Nemet2e2537f2014-08-07 17:53:55 +000085}
86
Adam Nemet2e91ee52014-08-14 17:13:19 +000087// This multiclass generates the unconditional/non-masking, the masking and
88// the zero-masking variant of the instruction. In the masking case, the
89// perserved vector elements come from a new dummy input operand tied to $dst.
90multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
91 string OpcodeStr,
92 string AttSrcAsm, string IntelSrcAsm,
93 dag RHS, ValueType OpVT, RegisterClass RC,
94 RegisterClass KRC> :
95 AVX512_masking_common<O, F, Outs,
96 Ins,
97 !con((ins RC:$src0, KRC:$mask), Ins),
98 !con((ins KRC:$mask), Ins),
99 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
100 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
101 "$src0 = $dst">;
102
103// Similar to AVX512_masking but in this case one of the source operands
104// ($src1) is already tied to $dst so we just use that for the preserved
105// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
106// $src1.
107multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
108 string OpcodeStr,
109 string AttSrcAsm, string IntelSrcAsm,
110 dag RHS, ValueType OpVT,
111 RegisterClass RC, RegisterClass KRC> :
112 AVX512_masking_common<O, F, Outs,
113 !con((ins RC:$src1), NonTiedIns),
114 !con((ins RC:$src1), !con((ins KRC:$mask),
115 NonTiedIns)),
116 !con((ins RC:$src1), !con((ins KRC:$mask),
117 NonTiedIns)),
118 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
119 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
120
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000121// Bitcasts between 512-bit vector types. Return the original type since
122// no instruction is needed for the conversion
123let Predicates = [HasAVX512] in {
Robert Khasanovbfa01312014-07-21 14:54:21 +0000124 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000125 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000126 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
127 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
128 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000129 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000130 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
131 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
132 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000133 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000134 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000135 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
136 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000137 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000138 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
139 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovsky40a77142014-08-11 09:59:08 +0000140 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000141 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
142 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000143 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000144 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
145 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
146 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
147 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
148 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
149 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
150 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
151 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
152 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
153 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
154 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000155
156 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
157 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
158 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
159 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
160 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
161 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
162 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
163 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
164 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
165 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
166 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
167 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
168 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
169 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
170 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
171 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
172 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
173 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
174 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
175 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
176 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
177 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
178 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
179 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
180 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
181 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
182 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
183 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
184 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
185 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
186
187// Bitcasts between 256-bit vector types. Return the original type since
188// no instruction is needed for the conversion
189 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
190 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
191 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
192 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
193 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
194 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
195 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
196 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
197 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
198 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
199 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
200 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
201 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
202 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
203 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
204 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
205 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
206 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
207 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
208 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
209 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
210 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
211 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
212 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
213 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
214 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
215 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
216 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
217 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
218 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
219}
220
221//
222// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
223//
224
225let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
226 isPseudo = 1, Predicates = [HasAVX512] in {
227def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
228 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
229}
230
Craig Topperfb1746b2014-01-30 06:03:19 +0000231let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000232def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
233def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
234def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000235}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000236
237//===----------------------------------------------------------------------===//
238// AVX-512 - VECTOR INSERT
239//
240// -- 32x8 form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000241let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000242def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
243 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
244 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
245 []>, EVEX_4V, EVEX_V512;
246let mayLoad = 1 in
247def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
248 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
249 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
250 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
251}
252
253// -- 64x4 fp form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000254let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000255def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
256 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
257 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
258 []>, EVEX_4V, EVEX_V512, VEX_W;
259let mayLoad = 1 in
260def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
261 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
262 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
263 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
264}
265// -- 32x4 integer form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000266let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000267def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
268 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
269 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
270 []>, EVEX_4V, EVEX_V512;
271let mayLoad = 1 in
272def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
273 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
274 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
275 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000276}
277
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000278let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000279// -- 64x4 form --
280def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
281 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
282 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
283 []>, EVEX_4V, EVEX_V512, VEX_W;
284let mayLoad = 1 in
285def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
286 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
287 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
288 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
289}
290
291def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
292 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
293 (INSERT_get_vinsert128_imm VR512:$ins))>;
294def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
295 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
296 (INSERT_get_vinsert128_imm VR512:$ins))>;
297def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
298 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
299 (INSERT_get_vinsert128_imm VR512:$ins))>;
300def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
301 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
302 (INSERT_get_vinsert128_imm VR512:$ins))>;
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000303
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000304def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
305 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
306 (INSERT_get_vinsert128_imm VR512:$ins))>;
307def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000308 (bc_v4i32 (loadv2i64 addr:$src2)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000309 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
310 (INSERT_get_vinsert128_imm VR512:$ins))>;
311def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
312 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
313 (INSERT_get_vinsert128_imm VR512:$ins))>;
314def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
315 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
316 (INSERT_get_vinsert128_imm VR512:$ins))>;
317
318def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
319 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
320 (INSERT_get_vinsert256_imm VR512:$ins))>;
321def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
322 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
323 (INSERT_get_vinsert256_imm VR512:$ins))>;
324def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
325 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
326 (INSERT_get_vinsert256_imm VR512:$ins))>;
327def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
328 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
329 (INSERT_get_vinsert256_imm VR512:$ins))>;
330
331def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
332 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
333 (INSERT_get_vinsert256_imm VR512:$ins))>;
334def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
335 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
336 (INSERT_get_vinsert256_imm VR512:$ins))>;
337def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
338 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
339 (INSERT_get_vinsert256_imm VR512:$ins))>;
340def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
341 (bc_v8i32 (loadv4i64 addr:$src2)),
342 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
343 (INSERT_get_vinsert256_imm VR512:$ins))>;
344
345// vinsertps - insert f32 to XMM
346def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
347 (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000348 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000349 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000350 EVEX_4V;
351def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
352 (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000353 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000354 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000355 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
356 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
357
358//===----------------------------------------------------------------------===//
359// AVX-512 VECTOR EXTRACT
360//---
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000361let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000362// -- 32x4 form --
363def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
364 (ins VR512:$src1, i8imm:$src2),
365 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
366 []>, EVEX, EVEX_V512;
367def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
368 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
369 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
370 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
371
372// -- 64x4 form --
373def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
374 (ins VR512:$src1, i8imm:$src2),
375 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
376 []>, EVEX, EVEX_V512, VEX_W;
377let mayStore = 1 in
378def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
379 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
380 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
381 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
382}
383
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000384let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000385// -- 32x4 form --
386def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
387 (ins VR512:$src1, i8imm:$src2),
388 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
389 []>, EVEX, EVEX_V512;
390def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
391 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
392 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
393 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
394
395// -- 64x4 form --
396def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
397 (ins VR512:$src1, i8imm:$src2),
398 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
399 []>, EVEX, EVEX_V512, VEX_W;
400let mayStore = 1 in
401def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
402 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
403 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
404 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
405}
406
407def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
408 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
409 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
410
411def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
412 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
413 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
414
415def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
416 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
417 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
418
419def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
420 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
421 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
422
423
424def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
425 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
426 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
427
428def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
429 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
430 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
431
432def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
433 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
434 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
435
436def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
437 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
438 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
439
440// A 256-bit subvector extract from the first 512-bit vector position
441// is a subregister copy that needs no instruction.
442def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
443 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
444def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
445 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
446def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
447 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
448def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
449 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
450
451// zmm -> xmm
452def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
453 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
454def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
455 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
456def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
457 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
458def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
459 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
460
461
462// A 128-bit subvector insert to the first 512-bit vector position
463// is a subregister copy that needs no instruction.
464def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
465 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
466 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
467 sub_ymm)>;
468def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
469 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
470 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
471 sub_ymm)>;
472def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
473 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
474 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
475 sub_ymm)>;
476def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
477 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
478 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
479 sub_ymm)>;
480
481def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
482 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
483def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
484 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
485def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
486 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
487def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
488 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
489
490// vextractps - extract 32 bits from XMM
491def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
492 (ins VR128X:$src1, u32u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000493 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000494 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
495 EVEX;
496
497def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
498 (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000499 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000500 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Elena Demikhovsky2aafc222014-02-11 07:25:59 +0000501 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000502
503//===---------------------------------------------------------------------===//
504// AVX-512 BROADCAST
505//---
506multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
507 RegisterClass DestRC,
508 RegisterClass SrcRC, X86MemOperand x86memop> {
509 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000510 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000511 []>, EVEX;
512 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000513 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000514}
515let ExeDomain = SSEPackedSingle in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000516 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000517 VR128X, f32mem>,
518 EVEX_V512, EVEX_CD8<32, CD8VT1>;
519}
520
521let ExeDomain = SSEPackedDouble in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000522 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000523 VR128X, f64mem>,
524 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
525}
526
527def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
528 (VBROADCASTSSZrm addr:$src)>;
529def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
530 (VBROADCASTSDZrm addr:$src)>;
531
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000532def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
533 (VBROADCASTSSZrm addr:$src)>;
534def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
535 (VBROADCASTSDZrm addr:$src)>;
536
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000537multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
538 RegisterClass SrcRC, RegisterClass KRC> {
539 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000540 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000541 []>, EVEX, EVEX_V512;
542 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
543 (ins KRC:$mask, SrcRC:$src),
544 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000545 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000546 []>, EVEX, EVEX_V512, EVEX_KZ;
547}
548
549defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
550defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
551 VEX_W;
552
553def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
554 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
555
556def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
557 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
558
559def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
560 (VPBROADCASTDrZrr GR32:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +0000561def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
562 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000563def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
564 (VPBROADCASTQrZrr GR64:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000565def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
566 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000567
Cameron McInally394d5572013-10-31 13:56:31 +0000568def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
569 (VPBROADCASTDrZrr GR32:$src)>;
570def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
571 (VPBROADCASTQrZrr GR64:$src)>;
572
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000573def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
574 (v16i32 immAllZerosV), (i16 GR16:$mask))),
575 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
576def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
577 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
578 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
579
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000580multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
581 X86MemOperand x86memop, PatFrag ld_frag,
582 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
583 RegisterClass KRC> {
584 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000585 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000586 [(set DstRC:$dst,
587 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
588 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
589 VR128X:$src),
590 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000591 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000592 [(set DstRC:$dst,
593 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
594 EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000595 let mayLoad = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000596 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000597 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000598 [(set DstRC:$dst,
599 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
600 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
601 x86memop:$src),
602 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000603 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000604 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
605 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000606 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000607}
608
609defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
610 loadi32, VR512, v16i32, v4i32, VK16WM>,
611 EVEX_V512, EVEX_CD8<32, CD8VT1>;
612defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
613 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
614 EVEX_CD8<64, CD8VT1>;
615
Adam Nemet73f72e12014-06-27 00:43:38 +0000616multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
617 X86MemOperand x86memop, PatFrag ld_frag,
618 RegisterClass KRC> {
619 let mayLoad = 1 in {
620 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
621 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
622 []>, EVEX;
623 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
624 x86memop:$src),
625 !strconcat(OpcodeStr,
626 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
627 []>, EVEX, EVEX_KZ;
628 }
629}
630
631defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
632 i128mem, loadv2i64, VK16WM>,
633 EVEX_V512, EVEX_CD8<32, CD8VT4>;
634defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
635 i256mem, loadv4i64, VK16WM>, VEX_W,
636 EVEX_V512, EVEX_CD8<64, CD8VT4>;
637
Cameron McInally394d5572013-10-31 13:56:31 +0000638def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
639 (VPBROADCASTDZrr VR128X:$src)>;
640def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
641 (VPBROADCASTQZrr VR128X:$src)>;
642
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000643def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
644 (VBROADCASTSSZrr VR128X:$src)>;
645def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
646 (VBROADCASTSDZrr VR128X:$src)>;
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000647
648def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
649 (VBROADCASTSSZrr VR128X:$src)>;
650def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
651 (VBROADCASTSDZrr VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000652
653// Provide fallback in case the load node that is used in the patterns above
654// is used by additional users, which prevents the pattern selection.
655def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
656 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
657def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
658 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
659
660
661let Predicates = [HasAVX512] in {
662def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
663 (EXTRACT_SUBREG
664 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
665 addr:$src)), sub_ymm)>;
666}
667//===----------------------------------------------------------------------===//
668// AVX-512 BROADCAST MASK TO VECTOR REGISTER
669//---
670
671multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
672 RegisterClass DstRC, RegisterClass KRC,
673 ValueType OpVT, ValueType SrcVT> {
674def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000675 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000676 []>, EVEX;
677}
678
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000679let Predicates = [HasCDI] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000680defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
681 VK16, v16i32, v16i1>, EVEX_V512;
682defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
683 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000684}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000685
686//===----------------------------------------------------------------------===//
687// AVX-512 - VPERM
688//
689// -- immediate form --
690multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
691 SDNode OpNode, PatFrag mem_frag,
692 X86MemOperand x86memop, ValueType OpVT> {
693 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
694 (ins RC:$src1, i8imm:$src2),
695 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000696 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000697 [(set RC:$dst,
698 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
699 EVEX;
700 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
701 (ins x86memop:$src1, i8imm:$src2),
702 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000703 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000704 [(set RC:$dst,
705 (OpVT (OpNode (mem_frag addr:$src1),
706 (i8 imm:$src2))))]>, EVEX;
707}
708
709defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
710 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
711let ExeDomain = SSEPackedDouble in
712defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
713 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
714
715// -- VPERM - register form --
716multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
717 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
718
719 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
720 (ins RC:$src1, RC:$src2),
721 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000722 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000723 [(set RC:$dst,
724 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
725
726 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
727 (ins RC:$src1, x86memop:$src2),
728 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000729 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000730 [(set RC:$dst,
731 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
732 EVEX_4V;
733}
734
735defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
736 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
737defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
738 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
739let ExeDomain = SSEPackedSingle in
740defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
741 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
742let ExeDomain = SSEPackedDouble in
743defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
744 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
745
746// -- VPERM2I - 3 source operands form --
747multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
748 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet2415a492014-07-02 21:25:54 +0000749 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000750let Constraints = "$src1 = $dst" in {
751 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
752 (ins RC:$src1, RC:$src2, RC:$src3),
753 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000754 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000755 [(set RC:$dst,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000756 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000757 EVEX_4V;
758
Adam Nemet2415a492014-07-02 21:25:54 +0000759 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
760 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
761 !strconcat(OpcodeStr,
762 " \t{$src3, $src2, $dst {${mask}}|"
763 "$dst {${mask}}, $src2, $src3}"),
764 [(set RC:$dst, (OpVT (vselect KRC:$mask,
765 (OpNode RC:$src1, RC:$src2,
766 RC:$src3),
767 RC:$src1)))]>,
768 EVEX_4V, EVEX_K;
769
770 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
771 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
772 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
773 !strconcat(OpcodeStr,
774 " \t{$src3, $src2, $dst {${mask}} {z} |",
775 "$dst {${mask}} {z}, $src2, $src3}"),
776 [(set RC:$dst, (OpVT (vselect KRC:$mask,
777 (OpNode RC:$src1, RC:$src2,
778 RC:$src3),
779 (OpVT (bitconvert
780 (v16i32 immAllZerosV))))))]>,
781 EVEX_4V, EVEX_KZ;
782
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000783 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
784 (ins RC:$src1, RC:$src2, x86memop:$src3),
785 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000786 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000787 [(set RC:$dst,
Adam Nemet2415a492014-07-02 21:25:54 +0000788 (OpVT (OpNode RC:$src1, RC:$src2,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000789 (mem_frag addr:$src3))))]>, EVEX_4V;
Adam Nemet2415a492014-07-02 21:25:54 +0000790
791 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
792 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
793 !strconcat(OpcodeStr,
794 " \t{$src3, $src2, $dst {${mask}}|"
795 "$dst {${mask}}, $src2, $src3}"),
796 [(set RC:$dst,
797 (OpVT (vselect KRC:$mask,
798 (OpNode RC:$src1, RC:$src2,
799 (mem_frag addr:$src3)),
800 RC:$src1)))]>,
801 EVEX_4V, EVEX_K;
802
803 let AddedComplexity = 10 in // Prefer over the rrkz variant
804 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
805 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
806 !strconcat(OpcodeStr,
807 " \t{$src3, $src2, $dst {${mask}} {z}|"
808 "$dst {${mask}} {z}, $src2, $src3}"),
809 [(set RC:$dst,
810 (OpVT (vselect KRC:$mask,
811 (OpNode RC:$src1, RC:$src2,
812 (mem_frag addr:$src3)),
813 (OpVT (bitconvert
814 (v16i32 immAllZerosV))))))]>,
815 EVEX_4V, EVEX_KZ;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000816 }
817}
Adam Nemet2415a492014-07-02 21:25:54 +0000818defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
819 i512mem, X86VPermiv3, v16i32, VK16WM>,
820 EVEX_V512, EVEX_CD8<32, CD8VF>;
821defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
822 i512mem, X86VPermiv3, v8i64, VK8WM>,
823 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
824defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
825 i512mem, X86VPermiv3, v16f32, VK16WM>,
826 EVEX_V512, EVEX_CD8<32, CD8VF>;
827defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
828 i512mem, X86VPermiv3, v8f64, VK8WM>,
829 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000830
Adam Nemetefe9c982014-07-02 21:25:58 +0000831multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
832 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000833 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
834 ValueType MaskVT, RegisterClass MRC> :
Adam Nemetefe9c982014-07-02 21:25:58 +0000835 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
836 OpVT, KRC> {
837 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
838 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
839 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000840
841 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
842 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
843 (!cast<Instruction>(NAME#rrk) VR512:$src1,
844 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000845}
846
847defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000848 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
849 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000850defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000851 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
852 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000853defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000854 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
855 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000856defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000857 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
858 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000859
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000860//===----------------------------------------------------------------------===//
861// AVX-512 - BLEND using mask
862//
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000863multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000864 RegisterClass KRC, RegisterClass RC,
865 X86MemOperand x86memop, PatFrag mem_frag,
866 SDNode OpNode, ValueType vt> {
867 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000868 (ins KRC:$mask, RC:$src1, RC:$src2),
869 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000870 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000871 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000872 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000873 let mayLoad = 1 in
874 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
875 (ins KRC:$mask, RC:$src1, x86memop:$src2),
876 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000877 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000878 []>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000879}
880
881let ExeDomain = SSEPackedSingle in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000882defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000883 VK16WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000884 memopv16f32, vselect, v16f32>,
885 EVEX_CD8<32, CD8VF>, EVEX_V512;
886let ExeDomain = SSEPackedDouble in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000887defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000888 VK8WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000889 memopv8f64, vselect, v8f64>,
890 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
891
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000892def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
893 (v16f32 VR512:$src2), (i16 GR16:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000894 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000895 VR512:$src1, VR512:$src2)>;
896
897def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
898 (v8f64 VR512:$src2), (i8 GR8:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000899 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000900 VR512:$src1, VR512:$src2)>;
901
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000902defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000903 VK16WM, VR512, f512mem,
904 memopv16i32, vselect, v16i32>,
905 EVEX_CD8<32, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000906
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000907defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000908 VK8WM, VR512, f512mem,
909 memopv8i64, vselect, v8i64>,
910 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000911
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000912def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
913 (v16i32 VR512:$src2), (i16 GR16:$mask))),
914 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
915 VR512:$src1, VR512:$src2)>;
916
917def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
918 (v8i64 VR512:$src2), (i8 GR8:$mask))),
919 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
920 VR512:$src1, VR512:$src2)>;
921
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000922let Predicates = [HasAVX512] in {
923def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
924 (v8f32 VR256X:$src2))),
925 (EXTRACT_SUBREG
926 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
927 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
928 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
929
930def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
931 (v8i32 VR256X:$src2))),
932 (EXTRACT_SUBREG
933 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
934 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
935 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
936}
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000937//===----------------------------------------------------------------------===//
938// Compare Instructions
939//===----------------------------------------------------------------------===//
940
941// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
942multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
943 Operand CC, SDNode OpNode, ValueType VT,
944 PatFrag ld_frag, string asm, string asm_alt> {
945 def rr : AVX512Ii8<0xC2, MRMSrcReg,
946 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
947 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
948 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
949 def rm : AVX512Ii8<0xC2, MRMSrcMem,
950 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
951 [(set VK1:$dst, (OpNode (VT RC:$src1),
952 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Craig Topper0550ce72014-01-05 04:55:55 +0000953 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000954 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
955 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
956 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
957 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
958 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
959 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
960 }
961}
962
963let Predicates = [HasAVX512] in {
964defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
965 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
966 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
967 XS;
968defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
969 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
970 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
971 XD, VEX_W;
972}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000973
974multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
975 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
976 SDNode OpNode, ValueType vt> {
977 def rr : AVX512BI<opc, MRMSrcReg,
978 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000979 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000980 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
981 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
982 def rm : AVX512BI<opc, MRMSrcMem,
983 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000984 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000985 [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2)))],
986 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
987}
988
989defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem,
Elena Demikhovskya5c38cb2014-02-24 10:08:30 +0000990 memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512,
991 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000992defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem,
Elena Demikhovskya5c38cb2014-02-24 10:08:30 +0000993 memopv8i64, X86pcmpeqm, v8i64>, T8PD, EVEX_V512,
994 VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000995
996defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem,
Elena Demikhovskya5c38cb2014-02-24 10:08:30 +0000997 memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512,
998 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000999defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem,
Elena Demikhovskya5c38cb2014-02-24 10:08:30 +00001000 memopv8i64, X86pcmpgtm, v8i64>, T8PD, EVEX_V512,
1001 VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001002
1003def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1004 (COPY_TO_REGCLASS (VPCMPGTDZrr
1005 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1006 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1007
1008def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1009 (COPY_TO_REGCLASS (VPCMPEQDZrr
1010 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1011 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1012
Adam Nemet79580db2014-07-08 00:22:32 +00001013multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001014 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
Adam Nemet1efcb902014-07-01 18:03:43 +00001015 SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001016 def rri : AVX512AIi8<opc, MRMSrcReg,
Adam Nemet1efcb902014-07-01 18:03:43 +00001017 (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
1018 !strconcat("vpcmp${cc}", Suffix,
1019 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001020 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
1021 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1022 def rmi : AVX512AIi8<opc, MRMSrcMem,
Adam Nemet1efcb902014-07-01 18:03:43 +00001023 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
1024 !strconcat("vpcmp${cc}", Suffix,
1025 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001026 [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
1027 imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1028 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001029 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001030 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001031 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001032 !strconcat("vpcmp", Suffix,
1033 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1034 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Adam Nemet16de2482014-07-01 18:03:45 +00001035 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
Adam Nemet79580db2014-07-08 00:22:32 +00001036 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001037 !strconcat("vpcmp", Suffix,
1038 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1039 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001040 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001041 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001042 !strconcat("vpcmp", Suffix,
1043 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1044 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Adam Nemet16de2482014-07-01 18:03:45 +00001045 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
Adam Nemet79580db2014-07-08 00:22:32 +00001046 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001047 !strconcat("vpcmp", Suffix,
1048 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1049 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001050 }
1051}
1052
Adam Nemet79580db2014-07-08 00:22:32 +00001053defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
Adam Nemet1efcb902014-07-01 18:03:43 +00001054 X86cmpm, v16i32, AVXCC, "d">,
1055 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemet79580db2014-07-08 00:22:32 +00001056defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
Adam Nemet1efcb902014-07-01 18:03:43 +00001057 X86cmpmu, v16i32, AVXCC, "ud">,
1058 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001059
Adam Nemet79580db2014-07-08 00:22:32 +00001060defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
Adam Nemet1efcb902014-07-01 18:03:43 +00001061 X86cmpm, v8i64, AVXCC, "q">,
1062 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Adam Nemet79580db2014-07-08 00:22:32 +00001063defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
Adam Nemet1efcb902014-07-01 18:03:43 +00001064 X86cmpmu, v8i64, AVXCC, "uq">,
1065 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001066
Adam Nemet905832b2014-06-26 00:21:12 +00001067// avx512_cmp_packed - compare packed instructions
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001068multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001069 X86MemOperand x86memop, ValueType vt,
1070 string suffix, Domain d> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001071 def rri : AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001072 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1073 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001074 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001075 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1076 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001077 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001078 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001079 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001080 [], d>, EVEX_B;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001081 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001082 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001083 !strconcat("vcmp${cc}", suffix,
1084 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001085 [(set KRC:$dst,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001086 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001087
1088 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001089 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Craig Toppera328ee42013-10-09 04:24:38 +00001090 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
Adam Nemet905832b2014-06-26 00:21:12 +00001091 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001092 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001093 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Craig Toppera328ee42013-10-09 04:24:38 +00001094 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
Adam Nemet905832b2014-06-26 00:21:12 +00001095 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001096 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001097 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001098 }
1099}
1100
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001101defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00001102 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
Craig Topperda7160d2014-02-01 08:17:56 +00001103 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001104defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00001105 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001106 EVEX_CD8<64, CD8VF>;
1107
1108def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1109 (COPY_TO_REGCLASS (VCMPPSZrri
1110 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1111 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1112 imm:$cc), VK8)>;
1113def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1114 (COPY_TO_REGCLASS (VPCMPDZrri
1115 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1116 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1117 imm:$cc), VK8)>;
1118def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1119 (COPY_TO_REGCLASS (VPCMPUDZrri
1120 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1121 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1122 imm:$cc), VK8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001123
1124def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1125 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1126 FROUND_NO_EXC)),
1127 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001128 (I8Imm imm:$cc)), GR16)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001129
1130def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1131 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1132 FROUND_NO_EXC)),
1133 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001134 (I8Imm imm:$cc)), GR8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001135
1136def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1137 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1138 FROUND_CURRENT)),
1139 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1140 (I8Imm imm:$cc)), GR16)>;
1141
1142def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1143 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1144 FROUND_CURRENT)),
1145 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1146 (I8Imm imm:$cc)), GR8)>;
1147
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001148// Mask register copy, including
1149// - copy between mask registers
1150// - load/store mask registers
1151// - copy from GPR to mask register and vice versa
1152//
1153multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1154 string OpcodeStr, RegisterClass KRC,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001155 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001156 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001157 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001158 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001159 let mayLoad = 1 in
1160 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001161 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Robert Khasanov74acbb72014-07-23 14:49:42 +00001162 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001163 let mayStore = 1 in
1164 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001165 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001166 }
1167}
1168
1169multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1170 string OpcodeStr,
1171 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001172 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001173 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001174 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001175 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001176 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001177 }
1178}
1179
Robert Khasanov74acbb72014-07-23 14:49:42 +00001180let Predicates = [HasDQI] in
1181 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1182 i8mem>,
1183 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1184 VEX, PD;
1185
1186let Predicates = [HasAVX512] in
1187 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1188 i16mem>,
1189 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001190 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001191
1192let Predicates = [HasBWI] in {
1193 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1194 i32mem>, VEX, PD, VEX_W;
1195 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1196 VEX, XD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001197}
1198
Robert Khasanov74acbb72014-07-23 14:49:42 +00001199let Predicates = [HasBWI] in {
1200 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1201 i64mem>, VEX, PS, VEX_W;
1202 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1203 VEX, XD, VEX_W;
1204}
1205
1206// GR from/to mask register
1207let Predicates = [HasDQI] in {
1208 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1209 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1210 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1211 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1212}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001213let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001214 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1215 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1216 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1217 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001218}
1219let Predicates = [HasBWI] in {
1220 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1221 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1222}
1223let Predicates = [HasBWI] in {
1224 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1225 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1226}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001227
Robert Khasanov74acbb72014-07-23 14:49:42 +00001228// Load/store kreg
1229let Predicates = [HasDQI] in {
1230 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1231 (KMOVBmk addr:$dst, VK8:$src)>;
1232}
1233let Predicates = [HasAVX512] in {
1234 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001235 (KMOVWmk addr:$dst, VK16:$src)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001236 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001237 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001238 def : Pat<(i1 (load addr:$src)),
1239 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001240 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001241 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001242}
1243let Predicates = [HasBWI] in {
1244 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1245 (KMOVDmk addr:$dst, VK32:$src)>;
1246}
1247let Predicates = [HasBWI] in {
1248 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1249 (KMOVQmk addr:$dst, VK64:$src)>;
1250}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001251
Robert Khasanov74acbb72014-07-23 14:49:42 +00001252let Predicates = [HasAVX512] in {
Elena Demikhovsky34d2d762014-08-18 11:59:06 +00001253 def : Pat<(i1 (trunc (i64 GR64:$src))),
1254 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1255 (i32 1))), VK1)>;
1256
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001257 def : Pat<(i1 (trunc (i32 GR32:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001258 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001259
1260 def : Pat<(i1 (trunc (i8 GR8:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001261 (COPY_TO_REGCLASS
1262 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1263 VK1)>;
1264 def : Pat<(i1 (trunc (i16 GR16:$src))),
1265 (COPY_TO_REGCLASS
1266 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1267 VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001268
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001269 def : Pat<(i32 (zext VK1:$src)),
1270 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001271 def : Pat<(i8 (zext VK1:$src)),
1272 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001273 (AND32ri (KMOVWrk
1274 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001275 def : Pat<(i64 (zext VK1:$src)),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001276 (AND64ri8 (SUBREG_TO_REG (i64 0),
1277 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
Elena Demikhovsky750498c2014-02-17 07:29:33 +00001278 def : Pat<(i16 (zext VK1:$src)),
1279 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001280 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1281 sub_16bit)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001282 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1283 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1284 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1285 (COPY_TO_REGCLASS VK1:$src, VK8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001286}
Robert Khasanov74acbb72014-07-23 14:49:42 +00001287let Predicates = [HasBWI] in {
1288 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1289 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1290 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1291 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1292}
1293
1294
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001295// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1296let Predicates = [HasAVX512] in {
1297 // GR from/to 8-bit mask without native support
1298 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1299 (COPY_TO_REGCLASS
1300 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1301 VK8)>;
1302 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1303 (EXTRACT_SUBREG
1304 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1305 sub_8bit)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001306
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001307 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001308 (COPY_TO_REGCLASS VK16:$src, VK1)>;
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001309 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001310 (COPY_TO_REGCLASS VK8:$src, VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001311}
1312let Predicates = [HasBWI] in {
1313 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1314 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1315 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1316 (COPY_TO_REGCLASS VK64:$src, VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001317}
1318
1319// Mask unary operation
1320// - KNOT
1321multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001322 RegisterClass KRC, SDPatternOperator OpNode,
1323 Predicate prd> {
1324 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001325 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001326 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001327 [(set KRC:$dst, (OpNode KRC:$src))]>;
1328}
1329
Robert Khasanov74acbb72014-07-23 14:49:42 +00001330multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1331 SDPatternOperator OpNode> {
1332 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1333 HasDQI>, VEX, PD;
1334 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1335 HasAVX512>, VEX, PS;
1336 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1337 HasBWI>, VEX, PD, VEX_W;
1338 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1339 HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001340}
1341
Robert Khasanov74acbb72014-07-23 14:49:42 +00001342defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001343
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001344multiclass avx512_mask_unop_int<string IntName, string InstName> {
1345 let Predicates = [HasAVX512] in
1346 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1347 (i16 GR16:$src)),
1348 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1349 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1350}
1351defm : avx512_mask_unop_int<"knot", "KNOT">;
1352
Robert Khasanov74acbb72014-07-23 14:49:42 +00001353let Predicates = [HasDQI] in
1354def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1355let Predicates = [HasAVX512] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001356def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001357let Predicates = [HasBWI] in
1358def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1359let Predicates = [HasBWI] in
1360def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1361
1362// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1363let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001364def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1365 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1366
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001367def : Pat<(not VK8:$src),
1368 (COPY_TO_REGCLASS
1369 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001370}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001371
1372// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001373// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001374multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00001375 RegisterClass KRC, SDPatternOperator OpNode,
1376 Predicate prd> {
1377 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001378 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1379 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001380 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001381 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1382}
1383
Robert Khasanov595683d2014-07-28 13:46:45 +00001384multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1385 SDPatternOperator OpNode> {
1386 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1387 HasDQI>, VEX_4V, VEX_L, PD;
1388 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1389 HasAVX512>, VEX_4V, VEX_L, PS;
1390 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1391 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1392 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1393 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001394}
1395
1396def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1397def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1398
1399let isCommutable = 1 in {
Robert Khasanov595683d2014-07-28 13:46:45 +00001400 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1401 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1402 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1403 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001404}
Robert Khasanov595683d2014-07-28 13:46:45 +00001405let isCommutable = 0 in
1406 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001407
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001408def : Pat<(xor VK1:$src1, VK1:$src2),
1409 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1410 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1411
1412def : Pat<(or VK1:$src1, VK1:$src2),
1413 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1414 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1415
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00001416def : Pat<(and VK1:$src1, VK1:$src2),
1417 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1418 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1419
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001420multiclass avx512_mask_binop_int<string IntName, string InstName> {
1421 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001422 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1423 (i16 GR16:$src1), (i16 GR16:$src2)),
1424 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1425 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1426 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001427}
1428
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001429defm : avx512_mask_binop_int<"kand", "KAND">;
1430defm : avx512_mask_binop_int<"kandn", "KANDN">;
1431defm : avx512_mask_binop_int<"kor", "KOR">;
1432defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1433defm : avx512_mask_binop_int<"kxor", "KXOR">;
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001434
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001435// With AVX-512, 8-bit mask is promoted to 16-bit mask.
1436multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1437 let Predicates = [HasAVX512] in
1438 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1439 (COPY_TO_REGCLASS
1440 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1441 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1442}
1443
1444defm : avx512_binop_pat<and, KANDWrr>;
1445defm : avx512_binop_pat<andn, KANDNWrr>;
1446defm : avx512_binop_pat<or, KORWrr>;
1447defm : avx512_binop_pat<xnor, KXNORWrr>;
1448defm : avx512_binop_pat<xor, KXORWrr>;
1449
1450// Mask unpacking
1451multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001452 RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001453 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001454 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001455 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001456 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001457}
1458
1459multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001460 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
Craig Topperae11aed2014-01-14 07:41:20 +00001461 VEX_4V, VEX_L, PD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001462}
1463
1464defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001465def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1466 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1467 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1468
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001469
1470multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1471 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001472 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1473 (i16 GR16:$src1), (i16 GR16:$src2)),
1474 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1475 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1476 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001477}
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001478defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001479
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001480// Mask bit testing
1481multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1482 SDNode OpNode> {
1483 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1484 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001485 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001486 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1487}
1488
1489multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1490 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001491 VEX, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001492}
1493
1494defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001495
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001496def : Pat<(X86cmp VK1:$src1, (i1 0)),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001497 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001498 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001499
1500// Mask shift
1501multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1502 SDNode OpNode> {
1503 let Predicates = [HasAVX512] in
1504 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1505 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001506 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001507 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1508}
1509
1510multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1511 SDNode OpNode> {
1512 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topperae11aed2014-01-14 07:41:20 +00001513 VEX, TAPD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001514}
1515
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001516defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1517defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001518
1519// Mask setting all 0s or 1s
1520multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1521 let Predicates = [HasAVX512] in
1522 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1523 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1524 [(set KRC:$dst, (VT Val))]>;
1525}
1526
1527multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001528 defm B : avx512_mask_setop<VK8, v8i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001529 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1530}
1531
1532defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1533defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1534
1535// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1536let Predicates = [HasAVX512] in {
1537 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1538 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001539 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1540 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1541 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001542}
1543def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1544 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1545
1546def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1547 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1548
1549def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1550 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1551
Elena Demikhovsky9737e382014-03-02 09:19:44 +00001552def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1553 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1554
1555def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1556 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001557//===----------------------------------------------------------------------===//
1558// AVX-512 - Aligned and unaligned load and store
1559//
1560
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001561multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1562 RegisterClass KRC, RegisterClass RC,
1563 ValueType vt, ValueType zvt, X86MemOperand memop,
1564 Domain d, bit IsReMaterializable = 1> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001565let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001566 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001567 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1568 d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001569 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001570 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1571 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001572 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001573 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1574 SchedRW = [WriteLoad] in
1575 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1576 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1577 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1578 d>, EVEX;
1579
1580 let AddedComplexity = 20 in {
1581 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1582 let hasSideEffects = 0 in
1583 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1584 (ins RC:$src0, KRC:$mask, RC:$src1),
1585 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1586 "${dst} {${mask}}, $src1}"),
1587 [(set RC:$dst, (vt (vselect KRC:$mask,
1588 (vt RC:$src1),
1589 (vt RC:$src0))))],
1590 d>, EVEX, EVEX_K;
1591 let mayLoad = 1, SchedRW = [WriteLoad] in
1592 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1593 (ins RC:$src0, KRC:$mask, memop:$src1),
1594 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1595 "${dst} {${mask}}, $src1}"),
1596 [(set RC:$dst, (vt
1597 (vselect KRC:$mask,
1598 (vt (bitconvert (ld_frag addr:$src1))),
1599 (vt RC:$src0))))],
1600 d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001601 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001602 let mayLoad = 1, SchedRW = [WriteLoad] in
1603 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1604 (ins KRC:$mask, memop:$src),
1605 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1606 "${dst} {${mask}} {z}, $src}"),
1607 [(set RC:$dst, (vt
1608 (vselect KRC:$mask,
1609 (vt (bitconvert (ld_frag addr:$src))),
1610 (vt (bitconvert (zvt immAllZerosV))))))],
1611 d>, EVEX, EVEX_KZ;
1612 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001613}
1614
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001615multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1616 string elty, string elsz, string vsz512,
1617 string vsz256, string vsz128, Domain d,
1618 Predicate prd, bit IsReMaterializable = 1> {
1619 let Predicates = [prd] in
1620 defm Z : avx512_load<opc, OpcodeStr,
1621 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1622 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1623 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1624 !cast<X86MemOperand>(elty##"512mem"), d,
1625 IsReMaterializable>, EVEX_V512;
1626
1627 let Predicates = [prd, HasVLX] in {
1628 defm Z256 : avx512_load<opc, OpcodeStr,
1629 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1630 "v"##vsz256##elty##elsz, "v4i64")),
1631 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1632 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1633 !cast<X86MemOperand>(elty##"256mem"), d,
1634 IsReMaterializable>, EVEX_V256;
1635
1636 defm Z128 : avx512_load<opc, OpcodeStr,
1637 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1638 "v"##vsz128##elty##elsz, "v2i64")),
1639 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1640 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1641 !cast<X86MemOperand>(elty##"128mem"), d,
1642 IsReMaterializable>, EVEX_V128;
1643 }
1644}
1645
1646
1647multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1648 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1649 X86MemOperand memop, Domain d> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001650 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1651 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001652 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001653 EVEX;
1654 let Constraints = "$src1 = $dst" in
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001655 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1656 (ins RC:$src1, KRC:$mask, RC:$src2),
1657 !strconcat(OpcodeStr,
1658 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001659 EVEX, EVEX_K;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001660 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001661 (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001662 !strconcat(OpcodeStr,
1663 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001664 [], d>, EVEX, EVEX_KZ;
1665 }
1666 let mayStore = 1 in {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001667 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1668 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1669 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001670 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001671 (ins memop:$dst, KRC:$mask, RC:$src),
1672 !strconcat(OpcodeStr,
1673 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001674 [], d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001675 }
1676}
1677
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001678
1679multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1680 string st_suff_512, string st_suff_256,
1681 string st_suff_128, string elty, string elsz,
1682 string vsz512, string vsz256, string vsz128,
1683 Domain d, Predicate prd> {
1684 let Predicates = [prd] in
1685 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1686 !cast<ValueType>("v"##vsz512##elty##elsz),
1687 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1688 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1689
1690 let Predicates = [prd, HasVLX] in {
1691 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1692 !cast<ValueType>("v"##vsz256##elty##elsz),
1693 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1694 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1695
1696 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1697 !cast<ValueType>("v"##vsz128##elty##elsz),
1698 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1699 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1700 }
1701}
1702
1703defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1704 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1705 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1706 "512", "256", "", "f", "32", "16", "8", "4",
1707 SSEPackedSingle, HasAVX512>,
1708 PS, EVEX_CD8<32, CD8VF>;
1709
1710defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1711 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1712 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1713 "512", "256", "", "f", "64", "8", "4", "2",
1714 SSEPackedDouble, HasAVX512>,
1715 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1716
1717defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1718 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1719 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1720 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1721 PS, EVEX_CD8<32, CD8VF>;
1722
1723defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1724 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1725 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1726 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1727 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1728
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001729def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001730 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001731 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001732
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001733def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1734 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1735 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001736
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001737def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1738 GR16:$mask),
1739 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1740 VR512:$src)>;
1741def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1742 GR8:$mask),
1743 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1744 VR512:$src)>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001745
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001746defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1747 "16", "8", "4", SSEPackedInt, HasAVX512>,
1748 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1749 "512", "256", "", "i", "32", "16", "8", "4",
1750 SSEPackedInt, HasAVX512>,
1751 PD, EVEX_CD8<32, CD8VF>;
1752
1753defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1754 "8", "4", "2", SSEPackedInt, HasAVX512>,
1755 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1756 "512", "256", "", "i", "64", "8", "4", "2",
1757 SSEPackedInt, HasAVX512>,
1758 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1759
1760defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1761 "64", "32", "16", SSEPackedInt, HasBWI>,
1762 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1763 "i", "8", "64", "32", "16", SSEPackedInt,
1764 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1765
1766defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1767 "32", "16", "8", SSEPackedInt, HasBWI>,
1768 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1769 "i", "16", "32", "16", "8", SSEPackedInt,
1770 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1771
1772defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1773 "16", "8", "4", SSEPackedInt, HasAVX512>,
1774 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1775 "i", "32", "16", "8", "4", SSEPackedInt,
1776 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1777
1778defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1779 "8", "4", "2", SSEPackedInt, HasAVX512>,
1780 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1781 "i", "64", "8", "4", "2", SSEPackedInt,
1782 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001783
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001784def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
1785 (v16i32 immAllZerosV), GR16:$mask)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001786 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001787
1788def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001789 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
1790 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001791
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001792def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001793 GR16:$mask),
1794 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001795 VR512:$src)>;
1796def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001797 GR8:$mask),
1798 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001799 VR512:$src)>;
1800
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001801let AddedComplexity = 20 in {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001802def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001803 (bc_v8i64 (v16i32 immAllZerosV)))),
1804 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001805
1806def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001807 (v8i64 VR512:$src))),
1808 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001809 VK8), VR512:$src)>;
1810
1811def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
1812 (v16i32 immAllZerosV))),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001813 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001814
1815def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001816 (v16i32 VR512:$src))),
1817 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001818}
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001819
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001820// Move Int Doubleword to Packed Double Int
1821//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001822def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001823 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001824 [(set VR128X:$dst,
1825 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
1826 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001827def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001828 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001829 [(set VR128X:$dst,
1830 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
1831 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001832def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001833 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001834 [(set VR128X:$dst,
1835 (v2i64 (scalar_to_vector GR64:$src)))],
1836 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
Craig Topper88adf2a2013-10-12 05:41:08 +00001837let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001838def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001839 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001840 [(set FR64:$dst, (bitconvert GR64:$src))],
1841 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001842def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001843 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001844 [(set GR64:$dst, (bitconvert FR64:$src))],
1845 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00001846}
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001847def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001848 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001849 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
1850 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
1851 EVEX_CD8<64, CD8VT1>;
1852
1853// Move Int Doubleword to Single Scalar
1854//
Craig Topper88adf2a2013-10-12 05:41:08 +00001855let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001856def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001857 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001858 [(set FR32X:$dst, (bitconvert GR32:$src))],
1859 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
1860
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001861def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001862 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001863 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
1864 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00001865}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001866
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001867// Move doubleword from xmm register to r/m32
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001868//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001869def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001870 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001871 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
1872 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
1873 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001874def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001875 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001876 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001877 [(store (i32 (vector_extract (v4i32 VR128X:$src),
1878 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
1879 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1880
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001881// Move quadword from xmm1 register to r/m64
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001882//
1883def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001884 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001885 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
1886 (iPTR 0)))],
Craig Topperae11aed2014-01-14 07:41:20 +00001887 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001888 Requires<[HasAVX512, In64BitMode]>;
1889
Elena Demikhovsky85aeffa2013-10-03 12:03:26 +00001890def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001891 (ins i64mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001892 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001893 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
1894 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topperae11aed2014-01-14 07:41:20 +00001895 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001896 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
1897
1898// Move Scalar Single to Double Int
1899//
Craig Topper88adf2a2013-10-12 05:41:08 +00001900let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001901def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001902 (ins FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001903 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001904 [(set GR32:$dst, (bitconvert FR32X:$src))],
1905 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001906def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001907 (ins i32mem:$dst, FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001908 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001909 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
1910 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00001911}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001912
1913// Move Quadword Int to Packed Quadword Int
1914//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001915def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001916 (ins i64mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001917 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001918 [(set VR128X:$dst,
1919 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
1920 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
1921
1922//===----------------------------------------------------------------------===//
1923// AVX-512 MOVSS, MOVSD
1924//===----------------------------------------------------------------------===//
1925
1926multiclass avx512_move_scalar <string asm, RegisterClass RC,
1927 SDNode OpNode, ValueType vt,
1928 X86MemOperand x86memop, PatFrag mem_pat> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001929 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001930 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001931 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001932 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
1933 (scalar_to_vector RC:$src2))))],
1934 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001935 let Constraints = "$src1 = $dst" in
1936 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
1937 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
1938 !strconcat(asm,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001939 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001940 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001941 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001942 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001943 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
1944 EVEX, VEX_LIG;
1945 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001946 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001947 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
1948 EVEX, VEX_LIG;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001949 } //hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001950}
1951
1952let ExeDomain = SSEPackedSingle in
Elena Demikhovskycf088092013-12-11 14:31:04 +00001953defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001954 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
1955
1956let ExeDomain = SSEPackedDouble in
Elena Demikhovskycf088092013-12-11 14:31:04 +00001957defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001958 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
1959
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001960def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
1961 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
1962 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
1963
1964def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
1965 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
1966 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001967
1968// For the disassembler
Craig Topper3484fc22014-01-05 04:17:28 +00001969let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001970 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
1971 (ins VR128X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001972 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001973 IIC_SSE_MOV_S_RR>,
1974 XS, EVEX_4V, VEX_LIG;
1975 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
1976 (ins VR128X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001977 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001978 IIC_SSE_MOV_S_RR>,
1979 XD, EVEX_4V, VEX_LIG, VEX_W;
1980}
1981
1982let Predicates = [HasAVX512] in {
1983 let AddedComplexity = 15 in {
1984 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
1985 // MOVS{S,D} to the lower bits.
1986 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
1987 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
1988 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
1989 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
1990 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
1991 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
1992 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
1993 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
1994
1995 // Move low f32 and clear high bits.
1996 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
1997 (SUBREG_TO_REG (i32 0),
1998 (VMOVSSZrr (v4f32 (V_SET0)),
1999 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2000 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2001 (SUBREG_TO_REG (i32 0),
2002 (VMOVSSZrr (v4i32 (V_SET0)),
2003 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2004 }
2005
2006 let AddedComplexity = 20 in {
2007 // MOVSSrm zeros the high parts of the register; represent this
2008 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2009 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2010 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2011 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2012 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2013 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2014 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2015
2016 // MOVSDrm zeros the high parts of the register; represent this
2017 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2018 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2019 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2020 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2021 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2022 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2023 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2024 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2025 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2026 def : Pat<(v2f64 (X86vzload addr:$src)),
2027 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2028
2029 // Represent the same patterns above but in the form they appear for
2030 // 256-bit types
2031 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2032 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002033 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002034 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2035 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2036 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2037 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2038 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2039 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2040 }
2041 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2042 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2043 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2044 FR32X:$src)), sub_xmm)>;
2045 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2046 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2047 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2048 FR64X:$src)), sub_xmm)>;
2049 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2050 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002051 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002052
2053 // Move low f64 and clear high bits.
2054 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2055 (SUBREG_TO_REG (i32 0),
2056 (VMOVSDZrr (v2f64 (V_SET0)),
2057 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2058
2059 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2060 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2061 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2062
2063 // Extract and store.
2064 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2065 addr:$dst),
2066 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2067 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2068 addr:$dst),
2069 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2070
2071 // Shuffle with VMOVSS
2072 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2073 (VMOVSSZrr (v4i32 VR128X:$src1),
2074 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2075 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2076 (VMOVSSZrr (v4f32 VR128X:$src1),
2077 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2078
2079 // 256-bit variants
2080 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2081 (SUBREG_TO_REG (i32 0),
2082 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2083 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2084 sub_xmm)>;
2085 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2086 (SUBREG_TO_REG (i32 0),
2087 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2088 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2089 sub_xmm)>;
2090
2091 // Shuffle with VMOVSD
2092 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2093 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2094 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2095 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2096 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2097 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2098 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2099 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2100
2101 // 256-bit variants
2102 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2103 (SUBREG_TO_REG (i32 0),
2104 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2105 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2106 sub_xmm)>;
2107 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2108 (SUBREG_TO_REG (i32 0),
2109 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2110 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2111 sub_xmm)>;
2112
2113 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2114 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2115 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2116 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2117 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2118 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2119 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2120 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2121}
2122
2123let AddedComplexity = 15 in
2124def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2125 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002126 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002127 [(set VR128X:$dst, (v2i64 (X86vzmovl
2128 (v2i64 VR128X:$src))))],
2129 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2130
2131let AddedComplexity = 20 in
2132def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2133 (ins i128mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002134 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002135 [(set VR128X:$dst, (v2i64 (X86vzmovl
2136 (loadv2i64 addr:$src))))],
2137 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2138 EVEX_CD8<8, CD8VT8>;
2139
2140let Predicates = [HasAVX512] in {
2141 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2142 let AddedComplexity = 20 in {
2143 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2144 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002145 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2146 (VMOV64toPQIZrr GR64:$src)>;
2147 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2148 (VMOVDI2PDIZrr GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002149
2150 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2151 (VMOVDI2PDIZrm addr:$src)>;
2152 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2153 (VMOVDI2PDIZrm addr:$src)>;
2154 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2155 (VMOVZPQILo2PQIZrm addr:$src)>;
2156 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2157 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00002158 def : Pat<(v2i64 (X86vzload addr:$src)),
2159 (VMOVZPQILo2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002160 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002161
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002162 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2163 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2164 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2165 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2166 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2167 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2168 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2169}
2170
2171def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2172 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2173
2174def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2175 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2176
2177def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2178 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2179
2180def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2181 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2182
2183//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00002184// AVX-512 - Non-temporals
2185//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00002186let SchedRW = [WriteLoad] in {
2187 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2188 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2189 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2190 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2191 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002192
Robert Khasanoved882972014-08-13 10:46:00 +00002193 let Predicates = [HasAVX512, HasVLX] in {
2194 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2195 (ins i256mem:$src),
2196 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2197 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2198 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002199
Robert Khasanoved882972014-08-13 10:46:00 +00002200 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2201 (ins i128mem:$src),
2202 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2203 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2204 EVEX_CD8<64, CD8VF>;
2205 }
Adam Nemetefd07852014-06-18 16:51:10 +00002206}
2207
Robert Khasanoved882972014-08-13 10:46:00 +00002208multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2209 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2210 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2211 let SchedRW = [WriteStore], mayStore = 1,
2212 AddedComplexity = 400 in
2213 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2214 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2215 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2216}
2217
2218multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2219 string elty, string elsz, string vsz512,
2220 string vsz256, string vsz128, Domain d,
2221 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2222 let Predicates = [prd] in
2223 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2224 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2225 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2226 EVEX_V512;
2227
2228 let Predicates = [prd, HasVLX] in {
2229 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2230 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2231 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2232 EVEX_V256;
2233
2234 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2235 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2236 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2237 EVEX_V128;
2238 }
2239}
2240
2241defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2242 "i", "64", "8", "4", "2", SSEPackedInt,
2243 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2244
2245defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2246 "f", "64", "8", "4", "2", SSEPackedDouble,
2247 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2248
2249defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2250 "f", "32", "16", "8", "4", SSEPackedSingle,
2251 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2252
Adam Nemet7f62b232014-06-10 16:39:53 +00002253//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002254// AVX-512 - Integer arithmetic
2255//
2256multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002257 ValueType OpVT, RegisterClass KRC,
2258 RegisterClass RC, PatFrag memop_frag,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002259 X86MemOperand x86memop, PatFrag scalar_mfrag,
2260 X86MemOperand x86scalar_mop, string BrdcstStr,
2261 OpndItins itins, bit IsCommutable = 0> {
2262 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002263 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2264 (ins RC:$src1, RC:$src2),
2265 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2266 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2267 itins.rr>, EVEX_4V;
2268 let AddedComplexity = 30 in {
2269 let Constraints = "$src0 = $dst" in
2270 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2271 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2272 !strconcat(OpcodeStr,
2273 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2274 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2275 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2276 RC:$src0)))],
2277 itins.rr>, EVEX_4V, EVEX_K;
2278 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2279 (ins KRC:$mask, RC:$src1, RC:$src2),
2280 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2281 "|$dst {${mask}} {z}, $src1, $src2}"),
2282 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2283 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2284 (OpVT immAllZerosV))))],
2285 itins.rr>, EVEX_4V, EVEX_KZ;
2286 }
2287
2288 let mayLoad = 1 in {
2289 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2290 (ins RC:$src1, x86memop:$src2),
2291 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2292 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2293 itins.rm>, EVEX_4V;
2294 let AddedComplexity = 30 in {
2295 let Constraints = "$src0 = $dst" in
2296 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2297 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2298 !strconcat(OpcodeStr,
2299 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2300 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2301 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2302 RC:$src0)))],
2303 itins.rm>, EVEX_4V, EVEX_K;
2304 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2305 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2306 !strconcat(OpcodeStr,
2307 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2308 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2309 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2310 (OpVT immAllZerosV))))],
2311 itins.rm>, EVEX_4V, EVEX_KZ;
2312 }
2313 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2314 (ins RC:$src1, x86scalar_mop:$src2),
2315 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2316 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2317 [(set RC:$dst, (OpNode RC:$src1,
2318 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2319 itins.rm>, EVEX_4V, EVEX_B;
2320 let AddedComplexity = 30 in {
2321 let Constraints = "$src0 = $dst" in
2322 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2323 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2324 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2325 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2326 BrdcstStr, "}"),
2327 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2328 (OpNode (OpVT RC:$src1),
2329 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2330 RC:$src0)))],
2331 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2332 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2333 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2334 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2335 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2336 BrdcstStr, "}"),
2337 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2338 (OpNode (OpVT RC:$src1),
2339 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2340 (OpVT immAllZerosV))))],
2341 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2342 }
2343 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002344}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002345
2346multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2347 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2348 PatFrag memop_frag, X86MemOperand x86memop,
2349 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2350 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002351 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002352 {
2353 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002354 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002355 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002356 []>, EVEX_4V;
2357 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2358 (ins KRC:$mask, RC:$src1, RC:$src2),
2359 !strconcat(OpcodeStr,
2360 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2361 [], itins.rr>, EVEX_4V, EVEX_K;
2362 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2363 (ins KRC:$mask, RC:$src1, RC:$src2),
2364 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2365 "|$dst {${mask}} {z}, $src1, $src2}"),
2366 [], itins.rr>, EVEX_4V, EVEX_KZ;
2367 }
2368 let mayLoad = 1 in {
2369 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2370 (ins RC:$src1, x86memop:$src2),
2371 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2372 []>, EVEX_4V;
2373 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2374 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2375 !strconcat(OpcodeStr,
2376 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2377 [], itins.rm>, EVEX_4V, EVEX_K;
2378 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2379 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2380 !strconcat(OpcodeStr,
2381 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2382 [], itins.rm>, EVEX_4V, EVEX_KZ;
2383 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2384 (ins RC:$src1, x86scalar_mop:$src2),
2385 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2386 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2387 [], itins.rm>, EVEX_4V, EVEX_B;
2388 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2389 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2390 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2391 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2392 BrdcstStr, "}"),
2393 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2394 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2395 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2396 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2397 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2398 BrdcstStr, "}"),
2399 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2400 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002401}
2402
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002403defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2404 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2405 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002406
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002407defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2408 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2409 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002410
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002411defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2412 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2413 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002414
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002415defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2416 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2417 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002418
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002419defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2420 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2421 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002422
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002423defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2424 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2425 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2426 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002427
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002428defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2429 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2430 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002431
2432def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2433 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2434
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002435def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2436 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2437 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2438def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2439 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2440 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2441
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002442defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2443 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2444 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002445 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002446defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2447 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2448 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002449 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002450
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002451defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2452 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2453 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002454 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002455defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2456 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2457 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002458 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002459
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002460defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2461 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2462 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002463 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002464defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2465 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2466 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002467 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002468
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002469defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2470 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2471 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002472 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002473defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2474 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2475 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002476 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002477
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002478def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2479 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2480 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2481def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2482 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2483 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2484def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2485 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2486 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2487def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2488 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2489 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2490def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2491 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2492 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2493def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2494 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2495 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2496def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2497 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2498 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2499def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2500 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2501 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002502//===----------------------------------------------------------------------===//
2503// AVX-512 - Unpack Instructions
2504//===----------------------------------------------------------------------===//
2505
2506multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2507 PatFrag mem_frag, RegisterClass RC,
2508 X86MemOperand x86memop, string asm,
2509 Domain d> {
2510 def rr : AVX512PI<opc, MRMSrcReg,
2511 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2512 asm, [(set RC:$dst,
2513 (vt (OpNode RC:$src1, RC:$src2)))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002514 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002515 def rm : AVX512PI<opc, MRMSrcMem,
2516 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2517 asm, [(set RC:$dst,
2518 (vt (OpNode RC:$src1,
2519 (bitconvert (mem_frag addr:$src2)))))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002520 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002521}
2522
2523defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2524 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002525 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002526defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2527 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002528 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002529defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2530 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002531 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002532defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2533 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002534 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002535
2536multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2537 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2538 X86MemOperand x86memop> {
2539 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2540 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002541 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002542 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2543 IIC_SSE_UNPCK>, EVEX_4V;
2544 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2545 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002546 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002547 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2548 (bitconvert (memop_frag addr:$src2)))))],
2549 IIC_SSE_UNPCK>, EVEX_4V;
2550}
2551defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2552 VR512, memopv16i32, i512mem>, EVEX_V512,
2553 EVEX_CD8<32, CD8VF>;
2554defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2555 VR512, memopv8i64, i512mem>, EVEX_V512,
2556 VEX_W, EVEX_CD8<64, CD8VF>;
2557defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2558 VR512, memopv16i32, i512mem>, EVEX_V512,
2559 EVEX_CD8<32, CD8VF>;
2560defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2561 VR512, memopv8i64, i512mem>, EVEX_V512,
2562 VEX_W, EVEX_CD8<64, CD8VF>;
2563//===----------------------------------------------------------------------===//
2564// AVX-512 - PSHUFD
2565//
2566
2567multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2568 SDNode OpNode, PatFrag mem_frag,
2569 X86MemOperand x86memop, ValueType OpVT> {
2570 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2571 (ins RC:$src1, i8imm:$src2),
2572 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002573 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002574 [(set RC:$dst,
2575 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2576 EVEX;
2577 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2578 (ins x86memop:$src1, i8imm:$src2),
2579 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002580 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002581 [(set RC:$dst,
2582 (OpVT (OpNode (mem_frag addr:$src1),
2583 (i8 imm:$src2))))]>, EVEX;
2584}
2585
2586defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
Craig Topperae11aed2014-01-14 07:41:20 +00002587 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002588
2589let ExeDomain = SSEPackedSingle in
2590defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002591 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002592 EVEX_CD8<32, CD8VF>;
2593let ExeDomain = SSEPackedDouble in
2594defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002595 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002596 VEX_W, EVEX_CD8<32, CD8VF>;
2597
2598def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2599 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2600def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2601 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2602
2603//===----------------------------------------------------------------------===//
2604// AVX-512 Logical Instructions
2605//===----------------------------------------------------------------------===//
2606
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002607defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002608 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2609 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002610defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002611 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2612 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002613defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002614 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2615 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002616defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002617 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2618 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002619defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002620 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2621 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002622defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002623 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2624 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002625defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002626 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2627 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002628defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2629 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2630 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002631
2632//===----------------------------------------------------------------------===//
2633// AVX-512 FP arithmetic
2634//===----------------------------------------------------------------------===//
2635
2636multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2637 SizeItins itins> {
Elena Demikhovskycf088092013-12-11 14:31:04 +00002638 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002639 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2640 EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00002641 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002642 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2643 EVEX_CD8<64, CD8VT1>;
2644}
2645
2646let isCommutable = 1 in {
2647defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2648defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2649defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2650defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2651}
2652let isCommutable = 0 in {
2653defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2654defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2655}
2656
2657multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002658 RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002659 RegisterClass RC, ValueType vt,
2660 X86MemOperand x86memop, PatFrag mem_frag,
2661 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2662 string BrdcstStr,
2663 Domain d, OpndItins itins, bit commutable> {
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002664 let isCommutable = commutable in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002665 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002666 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002667 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Craig Topperda7160d2014-02-01 08:17:56 +00002668 EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002669
2670 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2671 !strconcat(OpcodeStr,
2672 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2673 [], itins.rr, d>, EVEX_4V, EVEX_K;
2674
2675 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2676 !strconcat(OpcodeStr,
2677 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2678 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2679 }
2680
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002681 let mayLoad = 1 in {
2682 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002683 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002684 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
Craig Topperda7160d2014-02-01 08:17:56 +00002685 itins.rm, d>, EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002686
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002687 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2688 (ins RC:$src1, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002689 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002690 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002691 [(set RC:$dst, (OpNode RC:$src1,
2692 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
Craig Topperda7160d2014-02-01 08:17:56 +00002693 itins.rm, d>, EVEX_4V, EVEX_B;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002694
2695 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2696 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2697 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2698 [], itins.rm, d>, EVEX_4V, EVEX_K;
2699
2700 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2701 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2702 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2703 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2704
2705 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2706 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2707 " \t{${src2}", BrdcstStr,
2708 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2709 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2710
2711 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2712 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2713 " \t{${src2}", BrdcstStr,
2714 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2715 BrdcstStr, "}"),
2716 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2717 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002718}
2719
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002720defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002721 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002722 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002723
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002724defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002725 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2726 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002727 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002728
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002729defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002730 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002731 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002732defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002733 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2734 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002735 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002736
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002737defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002738 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2739 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002740 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002741defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002742 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2743 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002744 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002745
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002746defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002747 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2748 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002749 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002750defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002751 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2752 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002753 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002754
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002755defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002756 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002757 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002758defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002759 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002760 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002761
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002762defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002763 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2764 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002765 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002766defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002767 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2768 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002769 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002770
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00002771def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2772 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2773 (i16 -1), FROUND_CURRENT)),
2774 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
2775
2776def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
2777 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2778 (i8 -1), FROUND_CURRENT)),
2779 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
2780
2781def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
2782 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2783 (i16 -1), FROUND_CURRENT)),
2784 (VMINPSZrr VR512:$src1, VR512:$src2)>;
2785
2786def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
2787 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2788 (i8 -1), FROUND_CURRENT)),
2789 (VMINPDZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002790//===----------------------------------------------------------------------===//
2791// AVX-512 VPTESTM instructions
2792//===----------------------------------------------------------------------===//
2793
2794multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2795 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
2796 SDNode OpNode, ValueType vt> {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002797 def rr : AVX512PI<opc, MRMSrcReg,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002798 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002799 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002800 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
2801 SSEPackedInt>, EVEX_4V;
2802 def rm : AVX512PI<opc, MRMSrcMem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002803 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002804 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002805 [(set KRC:$dst, (OpNode (vt RC:$src1),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002806 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002807}
2808
2809defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002810 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002811 EVEX_CD8<32, CD8VF>;
2812defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002813 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002814 EVEX_CD8<64, CD8VF>;
2815
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002816let Predicates = [HasCDI] in {
2817defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
2818 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
2819 EVEX_CD8<32, CD8VF>;
2820defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002821 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002822 EVEX_CD8<64, CD8VF>;
2823}
2824
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00002825def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
2826 (v16i32 VR512:$src2), (i16 -1))),
2827 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
2828
2829def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
2830 (v8i64 VR512:$src2), (i8 -1))),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002831 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002832//===----------------------------------------------------------------------===//
2833// AVX-512 Shift instructions
2834//===----------------------------------------------------------------------===//
2835multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
2836 string OpcodeStr, SDNode OpNode, RegisterClass RC,
2837 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
2838 RegisterClass KRC> {
2839 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002840 (ins RC:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002841 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Lang Hames27839932013-10-21 17:51:24 +00002842 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002843 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
2844 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002845 (ins KRC:$mask, RC:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002846 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002847 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002848 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
2849 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002850 (ins x86memop:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002851 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002852 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
Lang Hames27839932013-10-21 17:51:24 +00002853 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002854 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002855 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002856 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002857 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002858 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
2859}
2860
2861multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2862 RegisterClass RC, ValueType vt, ValueType SrcVT,
2863 PatFrag bc_frag, RegisterClass KRC> {
2864 // src2 is always 128-bit
2865 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2866 (ins RC:$src1, VR128X:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002867 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002868 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
2869 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
2870 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2871 (ins KRC:$mask, RC:$src1, VR128X:$src2),
2872 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002873 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002874 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
2875 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2876 (ins RC:$src1, i128mem:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002877 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002878 [(set RC:$dst, (vt (OpNode RC:$src1,
2879 (bc_frag (memopv2i64 addr:$src2)))))],
2880 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
2881 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2882 (ins KRC:$mask, RC:$src1, i128mem:$src2),
2883 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002884 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002885 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
2886}
2887
2888defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
2889 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
2890 EVEX_V512, EVEX_CD8<32, CD8VF>;
2891defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
2892 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
2893 EVEX_CD8<32, CD8VQ>;
2894
2895defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
2896 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2897 EVEX_CD8<64, CD8VF>, VEX_W;
2898defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
2899 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2900 EVEX_CD8<64, CD8VQ>, VEX_W;
2901
2902defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
2903 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
2904 EVEX_CD8<32, CD8VF>;
2905defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
2906 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
2907 EVEX_CD8<32, CD8VQ>;
2908
2909defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
2910 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2911 EVEX_CD8<64, CD8VF>, VEX_W;
2912defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
2913 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2914 EVEX_CD8<64, CD8VQ>, VEX_W;
2915
2916defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
2917 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
2918 EVEX_V512, EVEX_CD8<32, CD8VF>;
2919defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
2920 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
2921 EVEX_CD8<32, CD8VQ>;
2922
2923defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
2924 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2925 EVEX_CD8<64, CD8VF>, VEX_W;
2926defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
2927 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2928 EVEX_CD8<64, CD8VQ>, VEX_W;
2929
2930//===-------------------------------------------------------------------===//
2931// Variable Bit Shifts
2932//===-------------------------------------------------------------------===//
2933multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
2934 RegisterClass RC, ValueType vt,
2935 X86MemOperand x86memop, PatFrag mem_frag> {
2936 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
2937 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002938 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002939 [(set RC:$dst,
2940 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
2941 EVEX_4V;
2942 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
2943 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002944 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002945 [(set RC:$dst,
2946 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
2947 EVEX_4V;
2948}
2949
2950defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
2951 i512mem, memopv16i32>, EVEX_V512,
2952 EVEX_CD8<32, CD8VF>;
2953defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
2954 i512mem, memopv8i64>, EVEX_V512, VEX_W,
2955 EVEX_CD8<64, CD8VF>;
2956defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
2957 i512mem, memopv16i32>, EVEX_V512,
2958 EVEX_CD8<32, CD8VF>;
2959defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
2960 i512mem, memopv8i64>, EVEX_V512, VEX_W,
2961 EVEX_CD8<64, CD8VF>;
2962defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
2963 i512mem, memopv16i32>, EVEX_V512,
2964 EVEX_CD8<32, CD8VF>;
2965defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
2966 i512mem, memopv8i64>, EVEX_V512, VEX_W,
2967 EVEX_CD8<64, CD8VF>;
2968
2969//===----------------------------------------------------------------------===//
2970// AVX-512 - MOVDDUP
2971//===----------------------------------------------------------------------===//
2972
2973multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
2974 X86MemOperand x86memop, PatFrag memop_frag> {
2975def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002976 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002977 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
2978def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002979 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002980 [(set RC:$dst,
2981 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
2982}
2983
2984defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
2985 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
2986def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
2987 (VMOVDDUPZrm addr:$src)>;
2988
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00002989//===---------------------------------------------------------------------===//
2990// Replicate Single FP - MOVSHDUP and MOVSLDUP
2991//===---------------------------------------------------------------------===//
2992multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
2993 ValueType vt, RegisterClass RC, PatFrag mem_frag,
2994 X86MemOperand x86memop> {
2995 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002996 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00002997 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
2998 let mayLoad = 1 in
2999 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003000 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003001 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3002}
3003
3004defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3005 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3006 EVEX_CD8<32, CD8VF>;
3007defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3008 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3009 EVEX_CD8<32, CD8VF>;
3010
3011def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3012def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3013 (VMOVSHDUPZrm addr:$src)>;
3014def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3015def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3016 (VMOVSLDUPZrm addr:$src)>;
3017
3018//===----------------------------------------------------------------------===//
3019// Move Low to High and High to Low packed FP Instructions
3020//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003021def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3022 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003023 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003024 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3025 IIC_SSE_MOV_LH>, EVEX_4V;
3026def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3027 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003028 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003029 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3030 IIC_SSE_MOV_LH>, EVEX_4V;
3031
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003032let Predicates = [HasAVX512] in {
3033 // MOVLHPS patterns
3034 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3035 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3036 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3037 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003038
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003039 // MOVHLPS patterns
3040 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3041 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3042}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003043
3044//===----------------------------------------------------------------------===//
3045// FMA - Fused Multiply Operations
3046//
3047let Constraints = "$src1 = $dst" in {
3048multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3049 RegisterClass RC, X86MemOperand x86memop,
3050 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
Adam Nemet2e91ee52014-08-14 17:13:19 +00003051 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3052 RegisterClass KRC> {
3053 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3054 (ins RC:$src2, RC:$src3),
3055 OpcodeStr, "$src3, $src2", "$src2, $src3",
3056 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3057 AVX512FMA3Base;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003058
3059 let mayLoad = 1 in
3060 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3061 (ins RC:$src1, RC:$src2, x86memop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003062 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003063 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3064 (mem_frag addr:$src3))))]>;
3065 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3066 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003067 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003068 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3069 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3070 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3071}
3072} // Constraints = "$src1 = $dst"
3073
3074let ExeDomain = SSEPackedSingle in {
3075 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3076 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003077 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003078 EVEX_CD8<32, CD8VF>;
3079 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3080 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003081 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003082 EVEX_CD8<32, CD8VF>;
3083 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3084 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003085 X86Fmaddsub, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003086 EVEX_V512, EVEX_CD8<32, CD8VF>;
3087 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3088 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003089 X86Fmsubadd, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003090 EVEX_V512, EVEX_CD8<32, CD8VF>;
3091 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3092 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003093 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003094 EVEX_CD8<32, CD8VF>;
3095 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3096 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003097 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003098 EVEX_CD8<32, CD8VF>;
3099}
3100let ExeDomain = SSEPackedDouble in {
3101 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3102 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003103 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003104 VEX_W, EVEX_CD8<64, CD8VF>;
3105 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3106 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003107 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003108 EVEX_CD8<64, CD8VF>;
3109 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3110 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003111 X86Fmaddsub, v8f64, VK8WM>,
3112 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003113 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3114 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003115 X86Fmsubadd, v8f64, VK8WM>,
3116 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003117 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3118 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003119 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003120 EVEX_CD8<64, CD8VF>;
3121 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3122 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003123 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003124 EVEX_CD8<64, CD8VF>;
3125}
3126
3127let Constraints = "$src1 = $dst" in {
3128multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3129 RegisterClass RC, X86MemOperand x86memop,
3130 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3131 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3132 let mayLoad = 1 in
3133 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3134 (ins RC:$src1, RC:$src3, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003135 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003136 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3137 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3138 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003139 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003140 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3141 [(set RC:$dst, (OpNode RC:$src1,
3142 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3143}
3144} // Constraints = "$src1 = $dst"
3145
3146
3147let ExeDomain = SSEPackedSingle in {
3148 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3149 memopv16f32, f32mem, loadf32, "{1to16}",
3150 X86Fmadd, v16f32>, EVEX_V512,
3151 EVEX_CD8<32, CD8VF>;
3152 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3153 memopv16f32, f32mem, loadf32, "{1to16}",
3154 X86Fmsub, v16f32>, EVEX_V512,
3155 EVEX_CD8<32, CD8VF>;
3156 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3157 memopv16f32, f32mem, loadf32, "{1to16}",
3158 X86Fmaddsub, v16f32>,
3159 EVEX_V512, EVEX_CD8<32, CD8VF>;
3160 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3161 memopv16f32, f32mem, loadf32, "{1to16}",
3162 X86Fmsubadd, v16f32>,
3163 EVEX_V512, EVEX_CD8<32, CD8VF>;
3164 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3165 memopv16f32, f32mem, loadf32, "{1to16}",
3166 X86Fnmadd, v16f32>, EVEX_V512,
3167 EVEX_CD8<32, CD8VF>;
3168 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3169 memopv16f32, f32mem, loadf32, "{1to16}",
3170 X86Fnmsub, v16f32>, EVEX_V512,
3171 EVEX_CD8<32, CD8VF>;
3172}
3173let ExeDomain = SSEPackedDouble in {
3174 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3175 memopv8f64, f64mem, loadf64, "{1to8}",
3176 X86Fmadd, v8f64>, EVEX_V512,
3177 VEX_W, EVEX_CD8<64, CD8VF>;
3178 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3179 memopv8f64, f64mem, loadf64, "{1to8}",
3180 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3181 EVEX_CD8<64, CD8VF>;
3182 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3183 memopv8f64, f64mem, loadf64, "{1to8}",
3184 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3185 EVEX_CD8<64, CD8VF>;
3186 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3187 memopv8f64, f64mem, loadf64, "{1to8}",
3188 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3189 EVEX_CD8<64, CD8VF>;
3190 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3191 memopv8f64, f64mem, loadf64, "{1to8}",
3192 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3193 EVEX_CD8<64, CD8VF>;
3194 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3195 memopv8f64, f64mem, loadf64, "{1to8}",
3196 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3197 EVEX_CD8<64, CD8VF>;
3198}
3199
3200// Scalar FMA
3201let Constraints = "$src1 = $dst" in {
3202multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3203 RegisterClass RC, ValueType OpVT,
3204 X86MemOperand x86memop, Operand memop,
3205 PatFrag mem_frag> {
3206 let isCommutable = 1 in
3207 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3208 (ins RC:$src1, RC:$src2, RC:$src3),
3209 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003210 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003211 [(set RC:$dst,
3212 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3213 let mayLoad = 1 in
3214 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3215 (ins RC:$src1, RC:$src2, f128mem:$src3),
3216 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003217 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003218 [(set RC:$dst,
3219 (OpVT (OpNode RC:$src2, RC:$src1,
3220 (mem_frag addr:$src3))))]>;
3221}
3222
3223} // Constraints = "$src1 = $dst"
3224
Elena Demikhovskycf088092013-12-11 14:31:04 +00003225defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003226 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003227defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003228 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003229defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003230 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003231defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003232 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003233defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003234 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003235defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003236 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003237defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003238 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003239defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003240 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3241
3242//===----------------------------------------------------------------------===//
3243// AVX-512 Scalar convert from sign integer to float/double
3244//===----------------------------------------------------------------------===//
3245
3246multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3247 X86MemOperand x86memop, string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003248let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003249 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003250 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003251 EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003252 let mayLoad = 1 in
3253 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3254 (ins DstRC:$src1, x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003255 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003256 EVEX_4V;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003257} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003258}
Andrew Trick15a47742013-10-09 05:11:10 +00003259let Predicates = [HasAVX512] in {
Elena Demikhovskycf088092013-12-11 14:31:04 +00003260defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003261 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003262defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003263 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003264defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003265 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003266defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003267 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3268
3269def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3270 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3271def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003272 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003273def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3274 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3275def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003276 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003277
3278def : Pat<(f32 (sint_to_fp GR32:$src)),
3279 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3280def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003281 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003282def : Pat<(f64 (sint_to_fp GR32:$src)),
3283 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3284def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003285 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3286
Elena Demikhovskycf088092013-12-11 14:31:04 +00003287defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003288 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003289defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003290 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003291defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003292 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003293defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003294 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3295
3296def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3297 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3298def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3299 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3300def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3301 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3302def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3303 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3304
3305def : Pat<(f32 (uint_to_fp GR32:$src)),
3306 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3307def : Pat<(f32 (uint_to_fp GR64:$src)),
3308 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3309def : Pat<(f64 (uint_to_fp GR32:$src)),
3310 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3311def : Pat<(f64 (uint_to_fp GR64:$src)),
3312 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00003313}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003314
3315//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003316// AVX-512 Scalar convert from float/double to integer
3317//===----------------------------------------------------------------------===//
3318multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3319 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3320 string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003321let hasSideEffects = 0 in {
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003322 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003323 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003324 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3325 Requires<[HasAVX512]>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003326 let mayLoad = 1 in
3327 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003328 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003329 Requires<[HasAVX512]>;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003330} // hasSideEffects = 0
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003331}
3332let Predicates = [HasAVX512] in {
3333// Convert float/double to signed/unsigned int 32/64
3334defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003335 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003336 XS, EVEX_CD8<32, CD8VT1>;
3337defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003338 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003339 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3340defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003341 ssmem, sse_load_f32, "cvtss2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003342 XS, EVEX_CD8<32, CD8VT1>;
3343defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3344 int_x86_avx512_cvtss2usi64, ssmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003345 sse_load_f32, "cvtss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003346 EVEX_CD8<32, CD8VT1>;
3347defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003348 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003349 XD, EVEX_CD8<64, CD8VT1>;
3350defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003351 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003352 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3353defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003354 sdmem, sse_load_f64, "cvtsd2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003355 XD, EVEX_CD8<64, CD8VT1>;
3356defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3357 int_x86_avx512_cvtsd2usi64, sdmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003358 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003359 EVEX_CD8<64, CD8VT1>;
3360
Craig Topper9dd48c82014-01-02 17:28:14 +00003361let isCodeGenOnly = 1 in {
3362 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3363 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3364 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3365 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3366 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3367 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3368 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3369 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3370 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3371 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3372 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3373 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003374
Craig Topper9dd48c82014-01-02 17:28:14 +00003375 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3376 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3377 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3378 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3379 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3380 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3381 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3382 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3383 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3384 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3385 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3386 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3387} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003388
3389// Convert float/double to signed/unsigned int 32/64 with truncation
Craig Topper9dd48c82014-01-02 17:28:14 +00003390let isCodeGenOnly = 1 in {
3391 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3392 ssmem, sse_load_f32, "cvttss2si">,
3393 XS, EVEX_CD8<32, CD8VT1>;
3394 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3395 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3396 "cvttss2si">, XS, VEX_W,
3397 EVEX_CD8<32, CD8VT1>;
3398 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3399 sdmem, sse_load_f64, "cvttsd2si">, XD,
3400 EVEX_CD8<64, CD8VT1>;
3401 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3402 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3403 "cvttsd2si">, XD, VEX_W,
3404 EVEX_CD8<64, CD8VT1>;
3405 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3406 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3407 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3408 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3409 int_x86_avx512_cvttss2usi64, ssmem,
3410 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3411 EVEX_CD8<32, CD8VT1>;
3412 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3413 int_x86_avx512_cvttsd2usi,
3414 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3415 EVEX_CD8<64, CD8VT1>;
3416 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3417 int_x86_avx512_cvttsd2usi64, sdmem,
3418 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3419 EVEX_CD8<64, CD8VT1>;
3420} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003421
3422multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3423 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3424 string asm> {
3425 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003426 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003427 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3428 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003429 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003430 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3431}
3432
3433defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003434 loadf32, "cvttss2si">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003435 EVEX_CD8<32, CD8VT1>;
3436defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003437 loadf32, "cvttss2usi">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003438 EVEX_CD8<32, CD8VT1>;
3439defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003440 loadf32, "cvttss2si">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003441 EVEX_CD8<32, CD8VT1>;
3442defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003443 loadf32, "cvttss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003444 EVEX_CD8<32, CD8VT1>;
3445defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003446 loadf64, "cvttsd2si">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003447 EVEX_CD8<64, CD8VT1>;
3448defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003449 loadf64, "cvttsd2usi">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003450 EVEX_CD8<64, CD8VT1>;
3451defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003452 loadf64, "cvttsd2si">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003453 EVEX_CD8<64, CD8VT1>;
3454defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003455 loadf64, "cvttsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003456 EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003457} // HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003458//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003459// AVX-512 Convert form float to double and back
3460//===----------------------------------------------------------------------===//
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003461let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003462def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3463 (ins FR32X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003464 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003465 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3466let mayLoad = 1 in
3467def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3468 (ins FR32X:$src1, f32mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003469 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003470 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3471 EVEX_CD8<32, CD8VT1>;
3472
3473// Convert scalar double to scalar single
3474def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3475 (ins FR64X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003476 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003477 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3478let mayLoad = 1 in
3479def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3480 (ins FR64X:$src1, f64mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003481 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003482 []>, EVEX_4V, VEX_LIG, VEX_W,
3483 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3484}
3485
3486def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3487 Requires<[HasAVX512]>;
3488def : Pat<(fextend (loadf32 addr:$src)),
3489 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3490
3491def : Pat<(extloadf32 addr:$src),
3492 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3493 Requires<[HasAVX512, OptForSize]>;
3494
3495def : Pat<(extloadf32 addr:$src),
3496 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3497 Requires<[HasAVX512, OptForSpeed]>;
3498
3499def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3500 Requires<[HasAVX512]>;
3501
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003502multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003503 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3504 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3505 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003506let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003507 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003508 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003509 [(set DstRC:$dst,
3510 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003511 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003512 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003513 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003514 let mayLoad = 1 in
3515 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003516 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003517 [(set DstRC:$dst,
3518 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003519} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003520}
3521
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003522multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003523 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3524 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3525 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003526let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003527 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003528 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003529 [(set DstRC:$dst,
3530 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3531 let mayLoad = 1 in
3532 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003533 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003534 [(set DstRC:$dst,
3535 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003536} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003537}
3538
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003539defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003540 memopv8f64, f512mem, v8f32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003541 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003542 EVEX_CD8<64, CD8VF>;
3543
3544defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3545 memopv4f64, f256mem, v8f64, v8f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003546 SSEPackedDouble>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003547 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003548def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3549 (VCVTPS2PDZrm addr:$src)>;
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00003550
3551def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3552 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3553 (VCVTPD2PSZrr VR512:$src)>;
3554
3555def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3556 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3557 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003558
3559//===----------------------------------------------------------------------===//
3560// AVX-512 Vector convert from sign integer to float/double
3561//===----------------------------------------------------------------------===//
3562
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003563defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003564 memopv8i64, i512mem, v16f32, v16i32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003565 SSEPackedSingle>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003566 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003567
3568defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3569 memopv4i64, i256mem, v8f64, v8i32,
3570 SSEPackedDouble>, EVEX_V512, XS,
3571 EVEX_CD8<32, CD8VH>;
3572
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003573defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003574 memopv16f32, f512mem, v16i32, v16f32,
3575 SSEPackedSingle>, EVEX_V512, XS,
3576 EVEX_CD8<32, CD8VF>;
3577
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003578defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003579 memopv8f64, f512mem, v8i32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003580 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003581 EVEX_CD8<64, CD8VF>;
3582
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003583defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003584 memopv16f32, f512mem, v16i32, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003585 SSEPackedSingle>, EVEX_V512, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003586 EVEX_CD8<32, CD8VF>;
3587
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003588// cvttps2udq (src, 0, mask-all-ones, sae-current)
3589def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3590 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3591 (VCVTTPS2UDQZrr VR512:$src)>;
3592
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003593defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003594 memopv8f64, f512mem, v8i32, v8f64,
Craig Topper5ccb6172014-02-18 00:21:49 +00003595 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003596 EVEX_CD8<64, CD8VF>;
3597
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003598// cvttpd2udq (src, 0, mask-all-ones, sae-current)
3599def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3600 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3601 (VCVTTPD2UDQZrr VR512:$src)>;
3602
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003603defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3604 memopv4i64, f256mem, v8f64, v8i32,
3605 SSEPackedDouble>, EVEX_V512, XS,
3606 EVEX_CD8<32, CD8VH>;
3607
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003608defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003609 memopv16i32, f512mem, v16f32, v16i32,
3610 SSEPackedSingle>, EVEX_V512, XD,
3611 EVEX_CD8<32, CD8VF>;
3612
3613def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3614 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3615 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3616
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00003617def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3618 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3619 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3620
3621def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3622 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3623 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3624
3625def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3626 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3627 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003628
Cameron McInallyf10a7c92014-06-18 14:04:37 +00003629def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3630 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3631 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3632
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003633def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003634 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003635 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003636def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3637 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3638 (VCVTDQ2PDZrr VR256X:$src)>;
3639def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3640 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3641 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3642def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3643 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3644 (VCVTUDQ2PDZrr VR256X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003645
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003646multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3647 RegisterClass DstRC, PatFrag mem_frag,
3648 X86MemOperand x86memop, Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003649let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003650 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003651 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003652 [], d>, EVEX;
3653 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003654 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003655 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003656 let mayLoad = 1 in
3657 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003658 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003659 [], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003660} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003661}
3662
3663defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
Craig Topperae11aed2014-01-14 07:41:20 +00003664 memopv16f32, f512mem, SSEPackedSingle>, PD,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003665 EVEX_V512, EVEX_CD8<32, CD8VF>;
3666defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3667 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3668 EVEX_V512, EVEX_CD8<64, CD8VF>;
3669
3670def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3671 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3672 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3673
3674def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3675 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3676 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3677
3678defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3679 memopv16f32, f512mem, SSEPackedSingle>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003680 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003681defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3682 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
Craig Topper5ccb6172014-02-18 00:21:49 +00003683 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003684
3685def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3686 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3687 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3688
3689def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3690 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3691 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003692
3693let Predicates = [HasAVX512] in {
3694 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3695 (VCVTPD2PSZrm addr:$src)>;
3696 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3697 (VCVTPS2PDZrm addr:$src)>;
3698}
3699
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003700//===----------------------------------------------------------------------===//
3701// Half precision conversion instructions
3702//===----------------------------------------------------------------------===//
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003703multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3704 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003705 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3706 "vcvtph2ps\t{$src, $dst|$dst, $src}",
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003707 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003708 let hasSideEffects = 0, mayLoad = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003709 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3710 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3711}
3712
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003713multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3714 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003715 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3716 (ins srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003717 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3718 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003719 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003720 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3721 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003722 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003723}
3724
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003725defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003726 EVEX_CD8<32, CD8VH>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003727defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003728 EVEX_CD8<32, CD8VH>;
3729
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003730def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3731 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3732 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3733
3734def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3735 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3736 (VCVTPH2PSZrr VR256X:$src)>;
3737
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003738let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3739 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003740 "ucomiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003741 EVEX_CD8<32, CD8VT1>;
3742 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Craig Topperae11aed2014-01-14 07:41:20 +00003743 "ucomisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003744 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3745 let Pattern = []<dag> in {
3746 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
Craig Topper5ccb6172014-02-18 00:21:49 +00003747 "comiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003748 EVEX_CD8<32, CD8VT1>;
3749 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
Craig Topperae11aed2014-01-14 07:41:20 +00003750 "comisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003751 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3752 }
Craig Topper9dd48c82014-01-02 17:28:14 +00003753 let isCodeGenOnly = 1 in {
3754 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003755 load, "ucomiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003756 EVEX_CD8<32, CD8VT1>;
3757 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003758 load, "ucomisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003759 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003760
Craig Topper9dd48c82014-01-02 17:28:14 +00003761 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003762 load, "comiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003763 EVEX_CD8<32, CD8VT1>;
3764 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003765 load, "comisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003766 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3767 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003768}
3769
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003770/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3771multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3772 X86MemOperand x86memop> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003773 let hasSideEffects = 0 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003774 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3775 (ins RC:$src1, RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003776 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003777 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003778 let mayLoad = 1 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003779 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3780 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003781 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003782 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003783 }
3784}
3785}
3786
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003787defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
3788 EVEX_CD8<32, CD8VT1>;
3789defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
3790 VEX_W, EVEX_CD8<64, CD8VT1>;
3791defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
3792 EVEX_CD8<32, CD8VT1>;
3793defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
3794 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003795
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003796def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
3797 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3798 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3799 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003800
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003801def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
3802 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3803 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3804 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003805
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003806def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
3807 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3808 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3809 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003810
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003811def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
3812 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3813 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3814 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003815
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003816/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
3817multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3818 RegisterClass RC, X86MemOperand x86memop,
3819 PatFrag mem_frag, ValueType OpVt> {
3820 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3821 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003822 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003823 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
3824 EVEX;
3825 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003826 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003827 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
3828 EVEX;
3829}
3830defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
3831 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3832defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
3833 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3834defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
3835 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3836defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
3837 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3838
3839def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
3840 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3841 (VRSQRT14PSZr VR512:$src)>;
3842def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
3843 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3844 (VRSQRT14PDZr VR512:$src)>;
3845
3846def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
3847 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3848 (VRCP14PSZr VR512:$src)>;
3849def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
3850 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3851 (VRCP14PDZr VR512:$src)>;
3852
3853/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
3854multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3855 X86MemOperand x86memop> {
3856 let hasSideEffects = 0, Predicates = [HasERI] in {
3857 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3858 (ins RC:$src1, RC:$src2),
3859 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003860 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003861 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3862 (ins RC:$src1, RC:$src2),
3863 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003864 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003865 []>, EVEX_4V, EVEX_B;
3866 let mayLoad = 1 in {
3867 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3868 (ins RC:$src1, x86memop:$src2),
3869 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003870 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003871 }
3872}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003873}
3874
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003875defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
3876 EVEX_CD8<32, CD8VT1>;
3877defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
3878 VEX_W, EVEX_CD8<64, CD8VT1>;
3879defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
3880 EVEX_CD8<32, CD8VT1>;
3881defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
3882 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003883
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003884def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
3885 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
3886 FROUND_NO_EXC)),
3887 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3888 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3889
3890def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
3891 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
3892 FROUND_NO_EXC)),
3893 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3894 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3895
3896def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
3897 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
3898 FROUND_NO_EXC)),
3899 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3900 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3901
3902def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
3903 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
3904 FROUND_NO_EXC)),
3905 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3906 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3907
3908/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
3909multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
3910 RegisterClass RC, X86MemOperand x86memop> {
3911 let hasSideEffects = 0, Predicates = [HasERI] in {
3912 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3913 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003914 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003915 []>, EVEX;
3916 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3917 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003918 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003919 []>, EVEX, EVEX_B;
3920 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003921 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003922 []>, EVEX;
3923 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003924}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003925defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
3926 EVEX_V512, EVEX_CD8<32, CD8VF>;
3927defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
3928 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3929defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
3930 EVEX_V512, EVEX_CD8<32, CD8VF>;
3931defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
3932 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3933
3934def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
3935 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
3936 (VRSQRT28PSZrb VR512:$src)>;
3937def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
3938 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
3939 (VRSQRT28PDZrb VR512:$src)>;
3940
3941def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
3942 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
3943 (VRCP28PSZrb VR512:$src)>;
3944def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
3945 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
3946 (VRCP28PDZrb VR512:$src)>;
3947
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003948multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003949 OpndItins itins_s, OpndItins itins_d> {
3950 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00003951 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003952 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
3953 EVEX, EVEX_V512;
3954
3955 let mayLoad = 1 in
3956 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00003957 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003958 [(set VR512:$dst,
3959 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
3960 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
3961
3962 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00003963 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003964 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
3965 EVEX, EVEX_V512;
3966
3967 let mayLoad = 1 in
3968 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00003969 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003970 [(set VR512:$dst, (OpNode
3971 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
3972 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
3973
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003974}
3975
3976multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
3977 Intrinsic F32Int, Intrinsic F64Int,
3978 OpndItins itins_s, OpndItins itins_d> {
3979 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
3980 (ins FR32X:$src1, FR32X:$src2),
3981 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003982 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003983 [], itins_s.rr>, XS, EVEX_4V;
Craig Topper9dd48c82014-01-02 17:28:14 +00003984 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003985 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
3986 (ins VR128X:$src1, VR128X:$src2),
3987 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003988 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003989 [(set VR128X:$dst,
3990 (F32Int VR128X:$src1, VR128X:$src2))],
3991 itins_s.rr>, XS, EVEX_4V;
3992 let mayLoad = 1 in {
3993 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
3994 (ins FR32X:$src1, f32mem:$src2),
3995 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003996 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003997 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00003998 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003999 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4000 (ins VR128X:$src1, ssmem:$src2),
4001 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004002 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004003 [(set VR128X:$dst,
4004 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4005 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4006 }
4007 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4008 (ins FR64X:$src1, FR64X:$src2),
4009 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004010 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004011 XD, EVEX_4V, VEX_W;
Craig Topper9dd48c82014-01-02 17:28:14 +00004012 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004013 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4014 (ins VR128X:$src1, VR128X:$src2),
4015 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004016 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004017 [(set VR128X:$dst,
4018 (F64Int VR128X:$src1, VR128X:$src2))],
4019 itins_s.rr>, XD, EVEX_4V, VEX_W;
4020 let mayLoad = 1 in {
4021 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4022 (ins FR64X:$src1, f64mem:$src2),
4023 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004024 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004025 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004026 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004027 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4028 (ins VR128X:$src1, sdmem:$src2),
4029 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004030 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004031 [(set VR128X:$dst,
4032 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4033 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4034 }
4035}
4036
4037
4038defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4039 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4040 SSE_SQRTSS, SSE_SQRTSD>,
4041 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004042 SSE_SQRTPS, SSE_SQRTPD>;
4043
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004044let Predicates = [HasAVX512] in {
Elena Demikhovskyf1648592014-07-22 11:07:31 +00004045 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4046 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4047 (VSQRTPSZrr VR512:$src1)>;
4048 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4049 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4050 (VSQRTPDZrr VR512:$src1)>;
4051
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004052 def : Pat<(f32 (fsqrt FR32X:$src)),
4053 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4054 def : Pat<(f32 (fsqrt (load addr:$src))),
4055 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4056 Requires<[OptForSize]>;
4057 def : Pat<(f64 (fsqrt FR64X:$src)),
4058 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4059 def : Pat<(f64 (fsqrt (load addr:$src))),
4060 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4061 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004062
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004063 def : Pat<(f32 (X86frsqrt FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004064 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004065 def : Pat<(f32 (X86frsqrt (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004066 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004067 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004068
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004069 def : Pat<(f32 (X86frcp FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004070 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004071 def : Pat<(f32 (X86frcp (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004072 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004073 Requires<[OptForSize]>;
4074
4075 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4076 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4077 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4078 VR128X)>;
4079 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4080 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4081
4082 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4083 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4084 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4085 VR128X)>;
4086 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4087 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4088}
4089
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004090
4091multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4092 X86MemOperand x86memop, RegisterClass RC,
4093 PatFrag mem_frag32, PatFrag mem_frag64,
4094 Intrinsic V4F32Int, Intrinsic V2F64Int,
4095 CD8VForm VForm> {
4096let ExeDomain = SSEPackedSingle in {
4097 // Intrinsic operation, reg.
4098 // Vector intrinsic operation, reg
4099 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4100 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4101 !strconcat(OpcodeStr,
4102 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4103 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4104
4105 // Vector intrinsic operation, mem
4106 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4107 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4108 !strconcat(OpcodeStr,
4109 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4110 [(set RC:$dst,
4111 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4112 EVEX_CD8<32, VForm>;
4113} // ExeDomain = SSEPackedSingle
4114
4115let ExeDomain = SSEPackedDouble in {
4116 // Vector intrinsic operation, reg
4117 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4118 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4119 !strconcat(OpcodeStr,
4120 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4121 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4122
4123 // Vector intrinsic operation, mem
4124 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4125 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4126 !strconcat(OpcodeStr,
4127 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4128 [(set RC:$dst,
4129 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4130 EVEX_CD8<64, VForm>;
4131} // ExeDomain = SSEPackedDouble
4132}
4133
4134multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4135 string OpcodeStr,
4136 Intrinsic F32Int,
4137 Intrinsic F64Int> {
4138let ExeDomain = GenericDomain in {
4139 // Operation, reg.
4140 let hasSideEffects = 0 in
4141 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4142 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4143 !strconcat(OpcodeStr,
4144 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4145 []>;
4146
4147 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004148 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004149 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4150 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4151 !strconcat(OpcodeStr,
4152 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4153 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4154
4155 // Intrinsic operation, mem.
4156 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4157 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4158 !strconcat(OpcodeStr,
4159 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4160 [(set VR128X:$dst, (F32Int VR128X:$src1,
4161 sse_load_f32:$src2, imm:$src3))]>,
4162 EVEX_CD8<32, CD8VT1>;
4163
4164 // Operation, reg.
4165 let hasSideEffects = 0 in
4166 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4167 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4168 !strconcat(OpcodeStr,
4169 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4170 []>, VEX_W;
4171
4172 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004173 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004174 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4175 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4176 !strconcat(OpcodeStr,
4177 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4178 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4179 VEX_W;
4180
4181 // Intrinsic operation, mem.
4182 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4183 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4184 !strconcat(OpcodeStr,
4185 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4186 [(set VR128X:$dst,
4187 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4188 VEX_W, EVEX_CD8<64, CD8VT1>;
4189} // ExeDomain = GenericDomain
4190}
4191
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004192multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4193 X86MemOperand x86memop, RegisterClass RC,
4194 PatFrag mem_frag, Domain d> {
4195let ExeDomain = d in {
4196 // Intrinsic operation, reg.
4197 // Vector intrinsic operation, reg
4198 def r : AVX512AIi8<opc, MRMSrcReg,
4199 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4200 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004201 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004202 []>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004203
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004204 // Vector intrinsic operation, mem
4205 def m : AVX512AIi8<opc, MRMSrcMem,
4206 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4207 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004208 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004209 []>, EVEX;
4210} // ExeDomain
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004211}
4212
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004213
4214defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4215 memopv16f32, SSEPackedSingle>, EVEX_V512,
4216 EVEX_CD8<32, CD8VF>;
4217
4218def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004219 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004220 FROUND_CURRENT)),
4221 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4222
4223
4224defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4225 memopv8f64, SSEPackedDouble>, EVEX_V512,
4226 VEX_W, EVEX_CD8<64, CD8VF>;
4227
4228def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004229 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004230 FROUND_CURRENT)),
4231 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4232
4233multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4234 Operand x86memop, RegisterClass RC, Domain d> {
4235let ExeDomain = d in {
4236 def r : AVX512AIi8<opc, MRMSrcReg,
4237 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4238 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004239 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004240 []>, EVEX_4V;
4241
4242 def m : AVX512AIi8<opc, MRMSrcMem,
4243 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4244 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004245 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004246 []>, EVEX_4V;
4247} // ExeDomain
4248}
4249
4250defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4251 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4252
4253defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4254 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4255
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004256def : Pat<(ffloor FR32X:$src),
4257 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4258def : Pat<(f64 (ffloor FR64X:$src)),
4259 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4260def : Pat<(f32 (fnearbyint FR32X:$src)),
4261 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4262def : Pat<(f64 (fnearbyint FR64X:$src)),
4263 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4264def : Pat<(f32 (fceil FR32X:$src)),
4265 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4266def : Pat<(f64 (fceil FR64X:$src)),
4267 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4268def : Pat<(f32 (frint FR32X:$src)),
4269 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4270def : Pat<(f64 (frint FR64X:$src)),
4271 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4272def : Pat<(f32 (ftrunc FR32X:$src)),
4273 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4274def : Pat<(f64 (ftrunc FR64X:$src)),
4275 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4276
4277def : Pat<(v16f32 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004278 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004279def : Pat<(v16f32 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004280 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004281def : Pat<(v16f32 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004282 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004283def : Pat<(v16f32 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004284 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004285def : Pat<(v16f32 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004286 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004287
4288def : Pat<(v8f64 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004289 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004290def : Pat<(v8f64 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004291 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004292def : Pat<(v8f64 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004293 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004294def : Pat<(v8f64 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004295 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004296def : Pat<(v8f64 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004297 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004298
4299//-------------------------------------------------
4300// Integer truncate and extend operations
4301//-------------------------------------------------
4302
4303multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4304 RegisterClass dstRC, RegisterClass srcRC,
4305 RegisterClass KRC, X86MemOperand x86memop> {
4306 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4307 (ins srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004308 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004309 []>, EVEX;
4310
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004311 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4312 (ins KRC:$mask, srcRC:$src),
4313 !strconcat(OpcodeStr,
4314 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4315 []>, EVEX, EVEX_K;
4316
4317 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004318 (ins KRC:$mask, srcRC:$src),
4319 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004320 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004321 []>, EVEX, EVEX_KZ;
4322
4323 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004324 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004325 []>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004326
4327 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4328 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4329 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4330 []>, EVEX, EVEX_K;
4331
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004332}
4333defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4334 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4335defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4336 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4337defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4338 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4339defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4340 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4341defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4342 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4343defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4344 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4345defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4346 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4347defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4348 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4349defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4350 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4351defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4352 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4353defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4354 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4355defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4356 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4357defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4358 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4359defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4360 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4361defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4362 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4363
4364def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4365def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4366def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4367def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4368def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4369
4370def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004371 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004372def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004373 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004374def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004375 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004376def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004377 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004378
4379
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004380multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4381 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4382 PatFrag mem_frag, X86MemOperand x86memop,
4383 ValueType OpVT, ValueType InVT> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004384
4385 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4386 (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004387 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004388 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004389
4390 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4391 (ins KRC:$mask, SrcRC:$src),
4392 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4393 []>, EVEX, EVEX_K;
4394
4395 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4396 (ins KRC:$mask, SrcRC:$src),
4397 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4398 []>, EVEX, EVEX_KZ;
4399
4400 let mayLoad = 1 in {
4401 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004402 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004403 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004404 [(set DstRC:$dst,
4405 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4406 EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004407
4408 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4409 (ins KRC:$mask, x86memop:$src),
4410 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4411 []>,
4412 EVEX, EVEX_K;
4413
4414 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4415 (ins KRC:$mask, x86memop:$src),
4416 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4417 []>,
4418 EVEX, EVEX_KZ;
4419 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004420}
4421
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004422defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004423 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4424 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004425defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004426 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4427 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004428defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004429 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4430 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004431defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004432 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4433 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004434defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004435 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4436 EVEX_CD8<32, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004437
4438defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004439 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4440 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004441defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004442 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4443 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004444defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004445 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4446 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004447defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004448 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4449 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004450defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004451 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4452 EVEX_CD8<32, CD8VH>;
4453
4454//===----------------------------------------------------------------------===//
4455// GATHER - SCATTER Operations
4456
4457multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4458 RegisterClass RC, X86MemOperand memop> {
4459let mayLoad = 1,
4460 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4461 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4462 (ins RC:$src1, KRC:$mask, memop:$src2),
4463 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004464 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004465 []>, EVEX, EVEX_K;
4466}
Cameron McInally45325962014-03-26 13:50:50 +00004467
4468let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004469defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4470 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004471defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4472 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004473}
4474
4475let ExeDomain = SSEPackedSingle in {
4476defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4477 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004478defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4479 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004480}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004481
4482defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4483 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4484defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4485 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4486
4487defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4488 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4489defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4490 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4491
4492multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4493 RegisterClass RC, X86MemOperand memop> {
4494let mayStore = 1, Constraints = "$mask = $mask_wb" in
4495 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4496 (ins memop:$dst, KRC:$mask, RC:$src2),
4497 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004498 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004499 []>, EVEX, EVEX_K;
4500}
4501
Cameron McInally45325962014-03-26 13:50:50 +00004502let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004503defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4504 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004505defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4506 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004507}
4508
4509let ExeDomain = SSEPackedSingle in {
4510defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4511 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004512defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4513 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004514}
4515
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004516defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4517 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4518defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4519 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4520
4521defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4522 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4523defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4524 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4525
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00004526// prefetch
4527multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4528 RegisterClass KRC, X86MemOperand memop> {
4529 let Predicates = [HasPFI], hasSideEffects = 1 in
4530 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4531 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4532 []>, EVEX, EVEX_K;
4533}
4534
4535defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4536 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4537
4538defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4539 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4540
4541defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4542 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4543
4544defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4545 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4546
4547defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4548 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4549
4550defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4551 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4552
4553defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4554 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4555
4556defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4557 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4558
4559defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4560 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4561
4562defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4563 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4564
4565defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4566 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4567
4568defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4569 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4570
4571defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4572 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4573
4574defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4575 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4576
4577defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4578 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4579
4580defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4581 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004582//===----------------------------------------------------------------------===//
4583// VSHUFPS - VSHUFPD Operations
4584
4585multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4586 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4587 Domain d> {
4588 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4589 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4590 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004591 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004592 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4593 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004594 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004595 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4596 (ins RC:$src1, RC:$src2, i8imm:$src3),
4597 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004598 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004599 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4600 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004601 EVEX_4V, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004602}
4603
4604defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004605 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004606defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00004607 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004608
Elena Demikhovsky462a2d22013-10-06 06:11:18 +00004609def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4610 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4611def : Pat<(v16i32 (X86Shufp VR512:$src1,
4612 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4613 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4614
4615def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4616 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4617def : Pat<(v8i64 (X86Shufp VR512:$src1,
4618 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4619 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004620
Adam Nemet5ed17da2014-08-21 19:50:07 +00004621multiclass avx512_valign<X86VectorVTInfo _> {
4622 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4623 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4624 "valign"##_.Suffix,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004625 "$src3, $src2, $src1", "$src1, $src2, $src3",
Adam Nemet5ed17da2014-08-21 19:50:07 +00004626 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4627 (i8 imm:$src3))),
4628 _.VT, _.RC, _.KRCWM>,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004629 AVX512AIi8Base, EVEX_4V;
Adam Nemetfd2161b2014-08-05 17:23:04 +00004630
Adam Nemetf92139d2014-08-05 17:22:50 +00004631 // Also match valign of packed floats.
Adam Nemet5ed17da2014-08-21 19:50:07 +00004632 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4633 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
Adam Nemetf92139d2014-08-05 17:22:50 +00004634
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00004635 let mayLoad = 1 in
Adam Nemet5ed17da2014-08-21 19:50:07 +00004636 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4637 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4638 !strconcat("valign"##_.Suffix,
Adam Nemet1c752d82014-08-05 17:22:47 +00004639 " \t{$src3, $src2, $src1, $dst|"
4640 "$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004641 []>, EVEX_4V;
4642}
Adam Nemet5ed17da2014-08-21 19:50:07 +00004643defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4644defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004645
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004646// Helper fragments to match sext vXi1 to vXiY.
4647def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4648def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4649
4650multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4651 RegisterClass KRC, RegisterClass RC,
4652 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4653 string BrdcstStr> {
4654 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4655 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4656 []>, EVEX;
4657 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4658 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4659 []>, EVEX, EVEX_K;
4660 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4661 !strconcat(OpcodeStr,
4662 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4663 []>, EVEX, EVEX_KZ;
4664 let mayLoad = 1 in {
4665 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4666 (ins x86memop:$src),
4667 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4668 []>, EVEX;
4669 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4670 (ins KRC:$mask, x86memop:$src),
4671 !strconcat(OpcodeStr,
4672 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4673 []>, EVEX, EVEX_K;
4674 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4675 (ins KRC:$mask, x86memop:$src),
4676 !strconcat(OpcodeStr,
4677 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4678 []>, EVEX, EVEX_KZ;
4679 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4680 (ins x86scalar_mop:$src),
4681 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4682 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4683 []>, EVEX, EVEX_B;
4684 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4685 (ins KRC:$mask, x86scalar_mop:$src),
4686 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4687 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4688 []>, EVEX, EVEX_B, EVEX_K;
4689 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4690 (ins KRC:$mask, x86scalar_mop:$src),
4691 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4692 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4693 BrdcstStr, "}"),
4694 []>, EVEX, EVEX_B, EVEX_KZ;
4695 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004696}
4697
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004698defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4699 i512mem, i32mem, "{1to16}">, EVEX_V512,
4700 EVEX_CD8<32, CD8VF>;
4701defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4702 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4703 EVEX_CD8<64, CD8VF>;
4704
4705def : Pat<(xor
4706 (bc_v16i32 (v16i1sextv16i32)),
4707 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4708 (VPABSDZrr VR512:$src)>;
4709def : Pat<(xor
4710 (bc_v8i64 (v8i1sextv8i64)),
4711 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4712 (VPABSQZrr VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004713
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004714def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4715 (v16i32 immAllZerosV), (i16 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004716 (VPABSDZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004717def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4718 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004719 (VPABSQZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004720
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004721multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004722 RegisterClass RC, RegisterClass KRC,
4723 X86MemOperand x86memop,
4724 X86MemOperand x86scalar_mop, string BrdcstStr> {
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004725 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4726 (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004727 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004728 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004729 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4730 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004731 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004732 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004733 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4734 (ins x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004735 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004736 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4737 []>, EVEX, EVEX_B;
4738 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4739 (ins KRC:$mask, RC:$src),
4740 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004741 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004742 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004743 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4744 (ins KRC:$mask, x86memop:$src),
4745 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004746 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004747 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004748 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4749 (ins KRC:$mask, x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004750 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004751 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4752 BrdcstStr, "}"),
4753 []>, EVEX, EVEX_KZ, EVEX_B;
4754
4755 let Constraints = "$src1 = $dst" in {
4756 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4757 (ins RC:$src1, KRC:$mask, RC:$src2),
4758 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004759 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004760 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004761 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4762 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4763 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004764 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004765 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004766 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4767 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004768 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004769 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4770 []>, EVEX, EVEX_K, EVEX_B;
4771 }
4772}
4773
4774let Predicates = [HasCDI] in {
4775defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004776 i512mem, i32mem, "{1to16}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004777 EVEX_V512, EVEX_CD8<32, CD8VF>;
4778
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004779
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004780defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004781 i512mem, i64mem, "{1to8}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004782 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004783
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004784}
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004785
4786def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
4787 GR16:$mask),
4788 (VPCONFLICTDrrk VR512:$src1,
4789 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4790
4791def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
4792 GR8:$mask),
4793 (VPCONFLICTQrrk VR512:$src1,
4794 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00004795
Cameron McInally5d1b7b92014-06-11 12:54:45 +00004796let Predicates = [HasCDI] in {
4797defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
4798 i512mem, i32mem, "{1to16}">,
4799 EVEX_V512, EVEX_CD8<32, CD8VF>;
4800
4801
4802defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
4803 i512mem, i64mem, "{1to8}">,
4804 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4805
4806}
4807
4808def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
4809 GR16:$mask),
4810 (VPLZCNTDrrk VR512:$src1,
4811 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4812
4813def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
4814 GR8:$mask),
4815 (VPLZCNTQrrk VR512:$src1,
4816 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
4817
Cameron McInally0d0489c2014-06-16 14:12:28 +00004818def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
4819 (VPLZCNTDrm addr:$src)>;
4820def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
4821 (VPLZCNTDrr VR512:$src)>;
4822def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
4823 (VPLZCNTQrm addr:$src)>;
4824def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
4825 (VPLZCNTQrr VR512:$src)>;
4826
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00004827def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4828def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4829def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Elena Demikhovskyacc5c9e2014-04-22 14:13:10 +00004830
4831def : Pat<(store VK1:$src, addr:$dst),
4832 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
4833
4834def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
4835 (truncstore node:$val, node:$ptr), [{
4836 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
4837}]>;
4838
4839def : Pat<(truncstorei1 GR8:$src, addr:$dst),
4840 (MOV8mr addr:$dst, GR8:$src)>;
4841