blob: 047686fcdc64f872b3c6f4a418a95952959c7261 [file] [log] [blame]
Adam Nemet5ed17da2014-08-21 19:50:07 +00001// Group template arguments that can be derived from the vector type (EltNum x
2// EltVT). These are things like the register class for the writemask, etc.
3// The idea is to pass one of these as the template argument rather than the
4// individual arguments.
5class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
6 string suffix = ""> {
7 RegisterClass RC = rc;
8
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
11
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
14
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
17 // !lt in tablegen.
18 RegisterClass MRC =
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
21
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
24
Robert Khasanov2ea081d2014-08-25 14:49:34 +000025 string VTName = "v" # NumElts # EltVT;
26
Adam Nemet5ed17da2014-08-21 19:50:07 +000027 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000028 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000029
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000032 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000034
35 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000036 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000037
38 // Size of RC in bits, e.g. 512 for VR512.
39 int Size = VT.Size;
40
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000043 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
44
45 // Load patterns
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
52 VTName)), VTName));
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
60 VTName,
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
63 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +000064
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
Adam Nemet55536c62014-09-25 23:48:45 +000067
68 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
69 !if (!eq (Size, 256), sub_ymm, ?));
70
71 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
72 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
73 SSEPackedInt));
Adam Nemet5ed17da2014-08-21 19:50:07 +000074}
75
Robert Khasanov2ea081d2014-08-25 14:49:34 +000076def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
77def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +000078def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
79def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
80
Robert Khasanov2ea081d2014-08-25 14:49:34 +000081// "x" in v32i8x_info means RC = VR256X
82def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
83def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
84def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
85def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
86
87def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
88def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
89def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
90def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
91
92class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
93 X86VectorVTInfo i128> {
94 X86VectorVTInfo info512 = i512;
95 X86VectorVTInfo info256 = i256;
96 X86VectorVTInfo info128 = i128;
97}
98
99def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
100 v16i8x_info>;
101def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
102 v8i16x_info>;
103def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
104 v4i32x_info>;
105def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
106 v2i64x_info>;
107
108
Adam Nemet2e91ee52014-08-14 17:13:19 +0000109// Common base class of AVX512_masking and AVX512_masking_3src.
110multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
111 dag MaskingIns, dag ZeroMaskingIns,
112 string OpcodeStr,
113 string AttSrcAsm, string IntelSrcAsm,
114 dag RHS, dag MaskingRHS, ValueType OpVT,
115 RegisterClass RC, RegisterClass KRC,
116 string MaskingConstraint = ""> {
Adam Nemet2e2537f2014-08-07 17:53:55 +0000117 def NAME: AVX512<O, F, Outs, Ins,
118 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
119 "$dst, "#IntelSrcAsm#"}",
120 [(set RC:$dst, RHS)]>;
121
Adam Nemetfa1f7202014-08-07 23:18:18 +0000122 // Prefer over VMOV*rrk Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000123 let AddedComplexity = 20 in
124 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Adam Nemet2e2537f2014-08-07 17:53:55 +0000125 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
126 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet2e91ee52014-08-14 17:13:19 +0000127 [(set RC:$dst, MaskingRHS)]>,
128 EVEX_K {
129 // In case of the 3src subclass this is overridden with a let.
130 string Constraints = MaskingConstraint;
131 }
Adam Nemet7d498622014-08-07 23:53:38 +0000132 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000133 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Adam Nemet7d498622014-08-07 23:53:38 +0000134 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
135 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
136 [(set RC:$dst,
137 (vselect KRC:$mask, RHS,
138 (OpVT (bitconvert
139 (v16i32 immAllZerosV)))))]>,
140 EVEX_KZ;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000141}
142
Adam Nemet2e91ee52014-08-14 17:13:19 +0000143// This multiclass generates the unconditional/non-masking, the masking and
144// the zero-masking variant of the instruction. In the masking case, the
145// perserved vector elements come from a new dummy input operand tied to $dst.
146multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
147 string OpcodeStr,
148 string AttSrcAsm, string IntelSrcAsm,
149 dag RHS, ValueType OpVT, RegisterClass RC,
150 RegisterClass KRC> :
151 AVX512_masking_common<O, F, Outs,
152 Ins,
153 !con((ins RC:$src0, KRC:$mask), Ins),
154 !con((ins KRC:$mask), Ins),
155 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
156 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
157 "$src0 = $dst">;
158
159// Similar to AVX512_masking but in this case one of the source operands
160// ($src1) is already tied to $dst so we just use that for the preserved
161// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
162// $src1.
163multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
164 string OpcodeStr,
165 string AttSrcAsm, string IntelSrcAsm,
166 dag RHS, ValueType OpVT,
167 RegisterClass RC, RegisterClass KRC> :
168 AVX512_masking_common<O, F, Outs,
169 !con((ins RC:$src1), NonTiedIns),
170 !con((ins RC:$src1), !con((ins KRC:$mask),
171 NonTiedIns)),
172 !con((ins RC:$src1), !con((ins KRC:$mask),
173 NonTiedIns)),
174 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
175 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
176
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000177// Bitcasts between 512-bit vector types. Return the original type since
178// no instruction is needed for the conversion
179let Predicates = [HasAVX512] in {
Robert Khasanovbfa01312014-07-21 14:54:21 +0000180 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000181 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000182 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
183 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
184 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000185 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000186 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
187 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
188 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000189 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000190 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000191 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
192 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000193 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000194 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
195 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovsky40a77142014-08-11 09:59:08 +0000196 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000197 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
198 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000199 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000200 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
201 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
202 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
203 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
204 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
205 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
206 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
207 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
208 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
209 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
210 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000211
212 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
213 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
214 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
215 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
216 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
217 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
218 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
219 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
220 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
221 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
222 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
223 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
224 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
225 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
226 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
227 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
228 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
229 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
230 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
231 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
232 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
233 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
234 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
235 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
236 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
237 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
238 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
239 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
240 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
241 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
242
243// Bitcasts between 256-bit vector types. Return the original type since
244// no instruction is needed for the conversion
245 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
246 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
247 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
248 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
249 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
250 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
251 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
252 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
253 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
254 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
255 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
256 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
257 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
258 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
259 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
260 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
261 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
262 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
263 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
264 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
265 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
266 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
267 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
268 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
269 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
270 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
271 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
272 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
273 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
274 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
275}
276
277//
278// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
279//
280
281let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
282 isPseudo = 1, Predicates = [HasAVX512] in {
283def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
284 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
285}
286
Craig Topperfb1746b2014-01-30 06:03:19 +0000287let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000288def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
289def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
290def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000291}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000292
293//===----------------------------------------------------------------------===//
294// AVX-512 - VECTOR INSERT
295//
296// -- 32x8 form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000297let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000298def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
299 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
300 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
301 []>, EVEX_4V, EVEX_V512;
302let mayLoad = 1 in
303def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
304 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
305 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
306 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
307}
308
309// -- 64x4 fp form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000310let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000311def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
312 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
313 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
314 []>, EVEX_4V, EVEX_V512, VEX_W;
315let mayLoad = 1 in
316def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
317 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
318 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
320}
321// -- 32x4 integer form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000322let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000323def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
324 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
325 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
326 []>, EVEX_4V, EVEX_V512;
327let mayLoad = 1 in
328def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
329 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
330 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
331 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000332}
333
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000334let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000335// -- 64x4 form --
336def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
337 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
338 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
339 []>, EVEX_4V, EVEX_V512, VEX_W;
340let mayLoad = 1 in
341def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
342 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
343 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
344 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
345}
346
347def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
348 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
349 (INSERT_get_vinsert128_imm VR512:$ins))>;
350def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
351 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
352 (INSERT_get_vinsert128_imm VR512:$ins))>;
353def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
354 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
357 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
358 (INSERT_get_vinsert128_imm VR512:$ins))>;
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000359
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000360def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000364 (bc_v4i32 (loadv2i64 addr:$src2)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000365 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
366 (INSERT_get_vinsert128_imm VR512:$ins))>;
367def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
368 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
369 (INSERT_get_vinsert128_imm VR512:$ins))>;
370def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
371 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
372 (INSERT_get_vinsert128_imm VR512:$ins))>;
373
374def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
375 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
376 (INSERT_get_vinsert256_imm VR512:$ins))>;
377def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
378 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
379 (INSERT_get_vinsert256_imm VR512:$ins))>;
380def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
381 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
384 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386
387def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
388 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
389 (INSERT_get_vinsert256_imm VR512:$ins))>;
390def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
391 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
393def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
394 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
395 (INSERT_get_vinsert256_imm VR512:$ins))>;
396def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
397 (bc_v8i32 (loadv4i64 addr:$src2)),
398 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
399 (INSERT_get_vinsert256_imm VR512:$ins))>;
400
401// vinsertps - insert f32 to XMM
402def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000403 (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000404 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000405 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000406 EVEX_4V;
407def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000408 (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000409 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000410 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000411 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
412 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
413
414//===----------------------------------------------------------------------===//
415// AVX-512 VECTOR EXTRACT
416//---
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000417
Adam Nemet55536c62014-09-25 23:48:45 +0000418multiclass vextract_for_size<int Opcode,
419 X86VectorVTInfo From, X86VectorVTInfo To,
420 X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
421 PatFrag vextract_extract,
422 SDNodeXForm EXTRACT_get_vextract_imm> {
423 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
424 def rr : AVX512AIi8<Opcode, MRMDestReg, (outs To.RC:$dst),
425 (ins VR512:$src1, i8imm:$src2),
426 "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
427 "$dst, $src1, $src2}",
428 []>, EVEX, EVEX_V512;
429 let mayStore = 1 in
430 def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
431 (ins To.MemOp:$dst, VR512:$src1, i8imm:$src2),
432 "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
433 "$dst, $src1, $src2}",
434 []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
435 }
436
437 // Codegen pattern, e.g. v16i32 -> v4i32 for vextracti32x4
438 def : Pat<(vextract_extract:$ext (From.VT VR512:$src1), (iPTR imm)),
439 (To.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr") VR512:$src1,
440 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
441
442 // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
443 // vextracti32x4
444 def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
445 (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
446 VR512:$src1,
447 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
448
449 // A 128/256-bit subvector extract from the first 512-bit vector position is
450 // a subregister copy that needs no instruction.
451 def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
452 (To.VT
453 (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
454
455 // And for the alternative types.
456 def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
457 (AltTo.VT
458 (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000459}
460
Adam Nemet55536c62014-09-25 23:48:45 +0000461multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
462 ValueType EltVT64, int Opcode64> {
463 defm NAME # "32x4" : vextract_for_size<Opcode32,
464 X86VectorVTInfo<16, EltVT32, VR512>,
465 X86VectorVTInfo< 4, EltVT32, VR128X>,
466 X86VectorVTInfo< 8, EltVT64, VR512>,
467 X86VectorVTInfo< 2, EltVT64, VR128X>,
468 vextract128_extract,
469 EXTRACT_get_vextract128_imm>;
470 defm NAME # "64x4" : vextract_for_size<Opcode64,
471 X86VectorVTInfo< 8, EltVT64, VR512>,
472 X86VectorVTInfo< 4, EltVT64, VR256X>,
473 X86VectorVTInfo<16, EltVT32, VR512>,
474 X86VectorVTInfo< 8, EltVT32, VR256>,
475 vextract256_extract,
476 EXTRACT_get_vextract256_imm>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000477}
478
Adam Nemet55536c62014-09-25 23:48:45 +0000479defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
480defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000481
482// A 128-bit subvector insert to the first 512-bit vector position
483// is a subregister copy that needs no instruction.
484def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
485 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
486 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
487 sub_ymm)>;
488def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
489 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
490 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
491 sub_ymm)>;
492def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
493 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
494 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
495 sub_ymm)>;
496def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
497 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
498 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
499 sub_ymm)>;
500
501def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
502 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
503def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
504 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
505def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
506 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
507def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
508 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
509
510// vextractps - extract 32 bits from XMM
511def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000512 (ins VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000513 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000514 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
515 EVEX;
516
517def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000518 (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000519 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000520 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Elena Demikhovsky2aafc222014-02-11 07:25:59 +0000521 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000522
523//===---------------------------------------------------------------------===//
524// AVX-512 BROADCAST
525//---
526multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
527 RegisterClass DestRC,
528 RegisterClass SrcRC, X86MemOperand x86memop> {
529 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000530 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000531 []>, EVEX;
532 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000533 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000534}
535let ExeDomain = SSEPackedSingle in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000536 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000537 VR128X, f32mem>,
538 EVEX_V512, EVEX_CD8<32, CD8VT1>;
539}
540
541let ExeDomain = SSEPackedDouble in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000542 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000543 VR128X, f64mem>,
544 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
545}
546
547def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
548 (VBROADCASTSSZrm addr:$src)>;
549def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
550 (VBROADCASTSDZrm addr:$src)>;
551
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000552def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
553 (VBROADCASTSSZrm addr:$src)>;
554def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
555 (VBROADCASTSDZrm addr:$src)>;
556
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000557multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
558 RegisterClass SrcRC, RegisterClass KRC> {
559 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000560 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000561 []>, EVEX, EVEX_V512;
562 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
563 (ins KRC:$mask, SrcRC:$src),
564 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000565 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000566 []>, EVEX, EVEX_V512, EVEX_KZ;
567}
568
569defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
570defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
571 VEX_W;
572
573def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
574 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
575
576def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
577 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
578
579def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
580 (VPBROADCASTDrZrr GR32:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +0000581def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
582 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000583def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
584 (VPBROADCASTQrZrr GR64:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000585def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
586 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000587
Cameron McInally394d5572013-10-31 13:56:31 +0000588def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
589 (VPBROADCASTDrZrr GR32:$src)>;
590def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
591 (VPBROADCASTQrZrr GR64:$src)>;
592
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000593def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
594 (v16i32 immAllZerosV), (i16 GR16:$mask))),
595 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
596def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
597 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
598 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
599
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000600multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
601 X86MemOperand x86memop, PatFrag ld_frag,
602 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
603 RegisterClass KRC> {
604 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000605 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000606 [(set DstRC:$dst,
607 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
608 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
609 VR128X:$src),
610 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000611 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000612 [(set DstRC:$dst,
613 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
614 EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000615 let mayLoad = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000616 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000617 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000618 [(set DstRC:$dst,
619 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
620 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
621 x86memop:$src),
622 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000623 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000624 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
625 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000626 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000627}
628
629defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
630 loadi32, VR512, v16i32, v4i32, VK16WM>,
631 EVEX_V512, EVEX_CD8<32, CD8VT1>;
632defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
633 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
634 EVEX_CD8<64, CD8VT1>;
635
Adam Nemet73f72e12014-06-27 00:43:38 +0000636multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
637 X86MemOperand x86memop, PatFrag ld_frag,
638 RegisterClass KRC> {
639 let mayLoad = 1 in {
640 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
641 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
642 []>, EVEX;
643 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
644 x86memop:$src),
645 !strconcat(OpcodeStr,
646 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
647 []>, EVEX, EVEX_KZ;
648 }
649}
650
651defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
652 i128mem, loadv2i64, VK16WM>,
653 EVEX_V512, EVEX_CD8<32, CD8VT4>;
654defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
655 i256mem, loadv4i64, VK16WM>, VEX_W,
656 EVEX_V512, EVEX_CD8<64, CD8VT4>;
657
Cameron McInally394d5572013-10-31 13:56:31 +0000658def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
659 (VPBROADCASTDZrr VR128X:$src)>;
660def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
661 (VPBROADCASTQZrr VR128X:$src)>;
662
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000663def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
664 (VBROADCASTSSZrr VR128X:$src)>;
665def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
666 (VBROADCASTSDZrr VR128X:$src)>;
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000667
668def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
669 (VBROADCASTSSZrr VR128X:$src)>;
670def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
671 (VBROADCASTSDZrr VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000672
673// Provide fallback in case the load node that is used in the patterns above
674// is used by additional users, which prevents the pattern selection.
675def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
676 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
677def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
678 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
679
680
681let Predicates = [HasAVX512] in {
682def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
683 (EXTRACT_SUBREG
684 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
685 addr:$src)), sub_ymm)>;
686}
687//===----------------------------------------------------------------------===//
688// AVX-512 BROADCAST MASK TO VECTOR REGISTER
689//---
690
691multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
692 RegisterClass DstRC, RegisterClass KRC,
693 ValueType OpVT, ValueType SrcVT> {
694def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000695 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000696 []>, EVEX;
697}
698
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000699let Predicates = [HasCDI] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000700defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
701 VK16, v16i32, v16i1>, EVEX_V512;
702defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
703 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000704}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000705
706//===----------------------------------------------------------------------===//
707// AVX-512 - VPERM
708//
709// -- immediate form --
710multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
711 SDNode OpNode, PatFrag mem_frag,
712 X86MemOperand x86memop, ValueType OpVT> {
713 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
714 (ins RC:$src1, i8imm:$src2),
715 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000716 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000717 [(set RC:$dst,
718 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
719 EVEX;
720 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
721 (ins x86memop:$src1, i8imm:$src2),
722 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000723 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000724 [(set RC:$dst,
725 (OpVT (OpNode (mem_frag addr:$src1),
726 (i8 imm:$src2))))]>, EVEX;
727}
728
729defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
730 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
731let ExeDomain = SSEPackedDouble in
732defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
733 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
734
735// -- VPERM - register form --
736multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
737 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
738
739 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
740 (ins RC:$src1, RC:$src2),
741 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000742 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000743 [(set RC:$dst,
744 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
745
746 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
747 (ins RC:$src1, x86memop:$src2),
748 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000749 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000750 [(set RC:$dst,
751 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
752 EVEX_4V;
753}
754
755defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
756 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
757defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
758 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
759let ExeDomain = SSEPackedSingle in
760defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
761 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
762let ExeDomain = SSEPackedDouble in
763defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
764 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
765
766// -- VPERM2I - 3 source operands form --
767multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
768 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet2415a492014-07-02 21:25:54 +0000769 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000770let Constraints = "$src1 = $dst" in {
771 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
772 (ins RC:$src1, RC:$src2, RC:$src3),
773 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000774 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000775 [(set RC:$dst,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000776 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000777 EVEX_4V;
778
Adam Nemet2415a492014-07-02 21:25:54 +0000779 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
780 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
781 !strconcat(OpcodeStr,
782 " \t{$src3, $src2, $dst {${mask}}|"
783 "$dst {${mask}}, $src2, $src3}"),
784 [(set RC:$dst, (OpVT (vselect KRC:$mask,
785 (OpNode RC:$src1, RC:$src2,
786 RC:$src3),
787 RC:$src1)))]>,
788 EVEX_4V, EVEX_K;
789
790 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
791 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
792 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
793 !strconcat(OpcodeStr,
794 " \t{$src3, $src2, $dst {${mask}} {z} |",
795 "$dst {${mask}} {z}, $src2, $src3}"),
796 [(set RC:$dst, (OpVT (vselect KRC:$mask,
797 (OpNode RC:$src1, RC:$src2,
798 RC:$src3),
799 (OpVT (bitconvert
800 (v16i32 immAllZerosV))))))]>,
801 EVEX_4V, EVEX_KZ;
802
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000803 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
804 (ins RC:$src1, RC:$src2, x86memop:$src3),
805 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000806 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000807 [(set RC:$dst,
Adam Nemet2415a492014-07-02 21:25:54 +0000808 (OpVT (OpNode RC:$src1, RC:$src2,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000809 (mem_frag addr:$src3))))]>, EVEX_4V;
Adam Nemet2415a492014-07-02 21:25:54 +0000810
811 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
812 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
813 !strconcat(OpcodeStr,
814 " \t{$src3, $src2, $dst {${mask}}|"
815 "$dst {${mask}}, $src2, $src3}"),
816 [(set RC:$dst,
817 (OpVT (vselect KRC:$mask,
818 (OpNode RC:$src1, RC:$src2,
819 (mem_frag addr:$src3)),
820 RC:$src1)))]>,
821 EVEX_4V, EVEX_K;
822
823 let AddedComplexity = 10 in // Prefer over the rrkz variant
824 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
825 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
826 !strconcat(OpcodeStr,
827 " \t{$src3, $src2, $dst {${mask}} {z}|"
828 "$dst {${mask}} {z}, $src2, $src3}"),
829 [(set RC:$dst,
830 (OpVT (vselect KRC:$mask,
831 (OpNode RC:$src1, RC:$src2,
832 (mem_frag addr:$src3)),
833 (OpVT (bitconvert
834 (v16i32 immAllZerosV))))))]>,
835 EVEX_4V, EVEX_KZ;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000836 }
837}
Adam Nemet2415a492014-07-02 21:25:54 +0000838defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
839 i512mem, X86VPermiv3, v16i32, VK16WM>,
840 EVEX_V512, EVEX_CD8<32, CD8VF>;
841defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
842 i512mem, X86VPermiv3, v8i64, VK8WM>,
843 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
844defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
845 i512mem, X86VPermiv3, v16f32, VK16WM>,
846 EVEX_V512, EVEX_CD8<32, CD8VF>;
847defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
848 i512mem, X86VPermiv3, v8f64, VK8WM>,
849 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000850
Adam Nemetefe9c982014-07-02 21:25:58 +0000851multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
852 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000853 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
854 ValueType MaskVT, RegisterClass MRC> :
Adam Nemetefe9c982014-07-02 21:25:58 +0000855 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
856 OpVT, KRC> {
857 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
858 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
859 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000860
861 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
862 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
863 (!cast<Instruction>(NAME#rrk) VR512:$src1,
864 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000865}
866
867defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000868 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000870defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000871 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000873defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000874 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000876defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000877 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000879
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000880//===----------------------------------------------------------------------===//
881// AVX-512 - BLEND using mask
882//
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000883multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000884 RegisterClass KRC, RegisterClass RC,
885 X86MemOperand x86memop, PatFrag mem_frag,
886 SDNode OpNode, ValueType vt> {
887 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000888 (ins KRC:$mask, RC:$src1, RC:$src2),
889 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000890 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000891 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000892 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000893 let mayLoad = 1 in
894 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
895 (ins KRC:$mask, RC:$src1, x86memop:$src2),
896 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000897 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000898 []>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000899}
900
901let ExeDomain = SSEPackedSingle in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000902defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000903 VK16WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000904 memopv16f32, vselect, v16f32>,
905 EVEX_CD8<32, CD8VF>, EVEX_V512;
906let ExeDomain = SSEPackedDouble in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000907defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000908 VK8WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000909 memopv8f64, vselect, v8f64>,
910 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
911
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000912def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
913 (v16f32 VR512:$src2), (i16 GR16:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000914 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000915 VR512:$src1, VR512:$src2)>;
916
917def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
918 (v8f64 VR512:$src2), (i8 GR8:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000919 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000920 VR512:$src1, VR512:$src2)>;
921
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000922defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000923 VK16WM, VR512, f512mem,
924 memopv16i32, vselect, v16i32>,
925 EVEX_CD8<32, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000926
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000927defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000928 VK8WM, VR512, f512mem,
929 memopv8i64, vselect, v8i64>,
930 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000931
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000932def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
933 (v16i32 VR512:$src2), (i16 GR16:$mask))),
934 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
935 VR512:$src1, VR512:$src2)>;
936
937def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
938 (v8i64 VR512:$src2), (i8 GR8:$mask))),
939 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
940 VR512:$src1, VR512:$src2)>;
941
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000942let Predicates = [HasAVX512] in {
943def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
944 (v8f32 VR256X:$src2))),
945 (EXTRACT_SUBREG
946 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
947 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
948 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
949
950def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
951 (v8i32 VR256X:$src2))),
952 (EXTRACT_SUBREG
953 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
954 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
955 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
956}
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000957//===----------------------------------------------------------------------===//
958// Compare Instructions
959//===----------------------------------------------------------------------===//
960
961// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
962multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
963 Operand CC, SDNode OpNode, ValueType VT,
964 PatFrag ld_frag, string asm, string asm_alt> {
965 def rr : AVX512Ii8<0xC2, MRMSrcReg,
966 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
967 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
968 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
969 def rm : AVX512Ii8<0xC2, MRMSrcMem,
970 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
971 [(set VK1:$dst, (OpNode (VT RC:$src1),
972 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Craig Topper0550ce72014-01-05 04:55:55 +0000973 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000974 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
975 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
976 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
977 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
978 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
979 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
980 }
981}
982
983let Predicates = [HasAVX512] in {
984defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
985 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
986 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
987 XS;
988defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
989 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
990 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
991 XD, VEX_W;
992}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000993
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000994multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
995 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000996 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +0000997 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
998 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
999 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001000 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001001 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001002 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001003 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1004 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1005 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1006 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001007 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001008 def rrk : AVX512BI<opc, MRMSrcReg,
1009 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1010 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1011 "$dst {${mask}}, $src1, $src2}"),
1012 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1013 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1014 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1015 let mayLoad = 1 in
1016 def rmk : AVX512BI<opc, MRMSrcMem,
1017 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1018 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1019 "$dst {${mask}}, $src1, $src2}"),
1020 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1021 (OpNode (_.VT _.RC:$src1),
1022 (_.VT (bitconvert
1023 (_.LdFrag addr:$src2))))))],
1024 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001025}
1026
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001027multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanovf70f7982014-09-18 14:06:55 +00001028 X86VectorVTInfo _> :
1029 avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001030 let mayLoad = 1 in {
1031 def rmb : AVX512BI<opc, MRMSrcMem,
1032 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1034 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1035 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1036 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1037 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1038 def rmbk : AVX512BI<opc, MRMSrcMem,
1039 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1040 _.ScalarMemOp:$src2),
1041 !strconcat(OpcodeStr,
1042 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1043 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1044 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1045 (OpNode (_.VT _.RC:$src1),
1046 (X86VBroadcast
1047 (_.ScalarLdFrag addr:$src2)))))],
1048 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1049 }
1050}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001051
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001052multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1053 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1054 let Predicates = [prd] in
1055 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1056 EVEX_V512;
1057
1058 let Predicates = [prd, HasVLX] in {
1059 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1060 EVEX_V256;
1061 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1062 EVEX_V128;
1063 }
1064}
1065
1066multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1067 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1068 Predicate prd> {
1069 let Predicates = [prd] in
1070 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1071 EVEX_V512;
1072
1073 let Predicates = [prd, HasVLX] in {
1074 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1075 EVEX_V256;
1076 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1077 EVEX_V128;
1078 }
1079}
1080
1081defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1082 avx512vl_i8_info, HasBWI>,
1083 EVEX_CD8<8, CD8VF>;
1084
1085defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1086 avx512vl_i16_info, HasBWI>,
1087 EVEX_CD8<16, CD8VF>;
1088
Robert Khasanovf70f7982014-09-18 14:06:55 +00001089defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001090 avx512vl_i32_info, HasAVX512>,
1091 EVEX_CD8<32, CD8VF>;
1092
Robert Khasanovf70f7982014-09-18 14:06:55 +00001093defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001094 avx512vl_i64_info, HasAVX512>,
1095 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1096
1097defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1098 avx512vl_i8_info, HasBWI>,
1099 EVEX_CD8<8, CD8VF>;
1100
1101defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1102 avx512vl_i16_info, HasBWI>,
1103 EVEX_CD8<16, CD8VF>;
1104
Robert Khasanovf70f7982014-09-18 14:06:55 +00001105defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001106 avx512vl_i32_info, HasAVX512>,
1107 EVEX_CD8<32, CD8VF>;
1108
Robert Khasanovf70f7982014-09-18 14:06:55 +00001109defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001110 avx512vl_i64_info, HasAVX512>,
1111 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001112
1113def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001114 (COPY_TO_REGCLASS (VPCMPGTDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001115 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1116 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1117
1118def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001119 (COPY_TO_REGCLASS (VPCMPEQDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001120 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1121 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1122
Robert Khasanov29e3b962014-08-27 09:34:37 +00001123multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1124 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001125 def rri : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001126 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001127 !strconcat("vpcmp${cc}", Suffix,
1128 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001129 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1130 imm:$cc))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001131 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001132 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001133 def rmi : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001134 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001135 !strconcat("vpcmp${cc}", Suffix,
1136 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001137 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1138 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1139 imm:$cc))],
1140 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1141 def rrik : AVX512AIi8<opc, MRMSrcReg,
1142 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1143 AVXCC:$cc),
1144 !strconcat("vpcmp${cc}", Suffix,
1145 "\t{$src2, $src1, $dst {${mask}}|",
1146 "$dst {${mask}}, $src1, $src2}"),
1147 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1148 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1149 imm:$cc)))],
1150 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1151 let mayLoad = 1 in
1152 def rmik : AVX512AIi8<opc, MRMSrcMem,
1153 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1154 AVXCC:$cc),
1155 !strconcat("vpcmp${cc}", Suffix,
1156 "\t{$src2, $src1, $dst {${mask}}|",
1157 "$dst {${mask}}, $src1, $src2}"),
1158 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1159 (OpNode (_.VT _.RC:$src1),
1160 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1161 imm:$cc)))],
1162 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1163
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001164 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001165 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001166 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001167 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
1168 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1169 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001170 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001171 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001172 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
1173 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1174 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001175 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001176 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1177 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1178 i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001179 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001180 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1181 "$dst {${mask}}, $src1, $src2, $cc}"),
1182 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1183 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1184 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1185 i8imm:$cc),
1186 !strconcat("vpcmp", Suffix,
1187 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1188 "$dst {${mask}}, $src1, $src2, $cc}"),
Adam Nemet16de2482014-07-01 18:03:45 +00001189 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001190 }
1191}
1192
Robert Khasanov29e3b962014-08-27 09:34:37 +00001193multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Robert Khasanovf70f7982014-09-18 14:06:55 +00001194 X86VectorVTInfo _> :
1195 avx512_icmp_cc<opc, Suffix, OpNode, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00001196 let mayLoad = 1 in {
1197 def rmib : AVX512AIi8<opc, MRMSrcMem,
1198 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1199 AVXCC:$cc),
1200 !strconcat("vpcmp${cc}", Suffix,
1201 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1202 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1203 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1204 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1205 imm:$cc))],
1206 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1207 def rmibk : AVX512AIi8<opc, MRMSrcMem,
1208 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1209 _.ScalarMemOp:$src2, AVXCC:$cc),
1210 !strconcat("vpcmp${cc}", Suffix,
1211 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1212 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1213 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1214 (OpNode (_.VT _.RC:$src1),
1215 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1216 imm:$cc)))],
1217 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1218 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001219
Robert Khasanov29e3b962014-08-27 09:34:37 +00001220 // Accept explicit immediate argument form instead of comparison code.
1221 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1222 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1223 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1224 i8imm:$cc),
1225 !strconcat("vpcmp", Suffix,
1226 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1227 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1228 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1229 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1230 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1231 _.ScalarMemOp:$src2, i8imm:$cc),
1232 !strconcat("vpcmp", Suffix,
1233 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1234 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1235 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1236 }
1237}
1238
1239multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1240 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1241 let Predicates = [prd] in
1242 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1243
1244 let Predicates = [prd, HasVLX] in {
1245 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1246 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1247 }
1248}
1249
1250multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1251 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1252 let Predicates = [prd] in
1253 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1254 EVEX_V512;
1255
1256 let Predicates = [prd, HasVLX] in {
1257 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1258 EVEX_V256;
1259 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1260 EVEX_V128;
1261 }
1262}
1263
1264defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1265 HasBWI>, EVEX_CD8<8, CD8VF>;
1266defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1267 HasBWI>, EVEX_CD8<8, CD8VF>;
1268
1269defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1270 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1271defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1272 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1273
Robert Khasanovf70f7982014-09-18 14:06:55 +00001274defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001275 HasAVX512>, EVEX_CD8<32, CD8VF>;
Robert Khasanovf70f7982014-09-18 14:06:55 +00001276defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001277 HasAVX512>, EVEX_CD8<32, CD8VF>;
1278
Robert Khasanovf70f7982014-09-18 14:06:55 +00001279defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001280 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanovf70f7982014-09-18 14:06:55 +00001281defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001282 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001283
Adam Nemet905832b2014-06-26 00:21:12 +00001284// avx512_cmp_packed - compare packed instructions
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001285multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001286 X86MemOperand x86memop, ValueType vt,
1287 string suffix, Domain d> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001288 def rri : AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001289 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1290 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001291 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001292 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1293 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001294 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001295 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001296 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001297 [], d>, EVEX_B;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001298 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001299 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001300 !strconcat("vcmp${cc}", suffix,
1301 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001302 [(set KRC:$dst,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001303 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001304
1305 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001306 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Craig Toppera328ee42013-10-09 04:24:38 +00001307 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
Adam Nemet905832b2014-06-26 00:21:12 +00001308 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001309 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001310 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Craig Toppera328ee42013-10-09 04:24:38 +00001311 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
Adam Nemet905832b2014-06-26 00:21:12 +00001312 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001313 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001314 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001315 }
1316}
1317
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001318defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00001319 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
Craig Topperda7160d2014-02-01 08:17:56 +00001320 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001321defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00001322 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001323 EVEX_CD8<64, CD8VF>;
1324
1325def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1326 (COPY_TO_REGCLASS (VCMPPSZrri
1327 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1328 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1329 imm:$cc), VK8)>;
1330def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1331 (COPY_TO_REGCLASS (VPCMPDZrri
1332 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1333 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1334 imm:$cc), VK8)>;
1335def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1336 (COPY_TO_REGCLASS (VPCMPUDZrri
1337 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1338 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1339 imm:$cc), VK8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001340
1341def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1342 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1343 FROUND_NO_EXC)),
1344 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001345 (I8Imm imm:$cc)), GR16)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001346
1347def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1348 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1349 FROUND_NO_EXC)),
1350 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001351 (I8Imm imm:$cc)), GR8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001352
1353def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1354 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1355 FROUND_CURRENT)),
1356 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1357 (I8Imm imm:$cc)), GR16)>;
1358
1359def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1360 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1361 FROUND_CURRENT)),
1362 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1363 (I8Imm imm:$cc)), GR8)>;
1364
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001365// Mask register copy, including
1366// - copy between mask registers
1367// - load/store mask registers
1368// - copy from GPR to mask register and vice versa
1369//
1370multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1371 string OpcodeStr, RegisterClass KRC,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001372 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001373 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001374 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001375 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001376 let mayLoad = 1 in
1377 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001378 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Robert Khasanov74acbb72014-07-23 14:49:42 +00001379 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001380 let mayStore = 1 in
1381 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001382 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001383 }
1384}
1385
1386multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1387 string OpcodeStr,
1388 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001389 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001390 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001391 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001392 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001393 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001394 }
1395}
1396
Robert Khasanov74acbb72014-07-23 14:49:42 +00001397let Predicates = [HasDQI] in
1398 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1399 i8mem>,
1400 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1401 VEX, PD;
1402
1403let Predicates = [HasAVX512] in
1404 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1405 i16mem>,
1406 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001407 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001408
1409let Predicates = [HasBWI] in {
1410 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1411 i32mem>, VEX, PD, VEX_W;
1412 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1413 VEX, XD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001414}
1415
Robert Khasanov74acbb72014-07-23 14:49:42 +00001416let Predicates = [HasBWI] in {
1417 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1418 i64mem>, VEX, PS, VEX_W;
1419 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1420 VEX, XD, VEX_W;
1421}
1422
1423// GR from/to mask register
1424let Predicates = [HasDQI] in {
1425 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1426 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1427 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1428 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1429}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001430let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001431 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1432 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1433 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1434 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001435}
1436let Predicates = [HasBWI] in {
1437 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1438 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1439}
1440let Predicates = [HasBWI] in {
1441 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1442 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1443}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001444
Robert Khasanov74acbb72014-07-23 14:49:42 +00001445// Load/store kreg
1446let Predicates = [HasDQI] in {
1447 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1448 (KMOVBmk addr:$dst, VK8:$src)>;
1449}
1450let Predicates = [HasAVX512] in {
1451 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001452 (KMOVWmk addr:$dst, VK16:$src)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001453 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001454 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001455 def : Pat<(i1 (load addr:$src)),
1456 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001457 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001458 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001459}
1460let Predicates = [HasBWI] in {
1461 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1462 (KMOVDmk addr:$dst, VK32:$src)>;
1463}
1464let Predicates = [HasBWI] in {
1465 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1466 (KMOVQmk addr:$dst, VK64:$src)>;
1467}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001468
Robert Khasanov74acbb72014-07-23 14:49:42 +00001469let Predicates = [HasAVX512] in {
Elena Demikhovsky34d2d762014-08-18 11:59:06 +00001470 def : Pat<(i1 (trunc (i64 GR64:$src))),
1471 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1472 (i32 1))), VK1)>;
1473
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001474 def : Pat<(i1 (trunc (i32 GR32:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001475 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001476
1477 def : Pat<(i1 (trunc (i8 GR8:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001478 (COPY_TO_REGCLASS
1479 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1480 VK1)>;
1481 def : Pat<(i1 (trunc (i16 GR16:$src))),
1482 (COPY_TO_REGCLASS
1483 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1484 VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001485
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001486 def : Pat<(i32 (zext VK1:$src)),
1487 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001488 def : Pat<(i8 (zext VK1:$src)),
1489 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001490 (AND32ri (KMOVWrk
1491 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001492 def : Pat<(i64 (zext VK1:$src)),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001493 (AND64ri8 (SUBREG_TO_REG (i64 0),
1494 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
Elena Demikhovsky750498c2014-02-17 07:29:33 +00001495 def : Pat<(i16 (zext VK1:$src)),
1496 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001497 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1498 sub_16bit)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001499 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1500 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1501 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1502 (COPY_TO_REGCLASS VK1:$src, VK8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001503}
Robert Khasanov74acbb72014-07-23 14:49:42 +00001504let Predicates = [HasBWI] in {
1505 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1506 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1507 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1508 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1509}
1510
1511
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001512// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1513let Predicates = [HasAVX512] in {
1514 // GR from/to 8-bit mask without native support
1515 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1516 (COPY_TO_REGCLASS
1517 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1518 VK8)>;
1519 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1520 (EXTRACT_SUBREG
1521 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1522 sub_8bit)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001523
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001524 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001525 (COPY_TO_REGCLASS VK16:$src, VK1)>;
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001526 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001527 (COPY_TO_REGCLASS VK8:$src, VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001528}
1529let Predicates = [HasBWI] in {
1530 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1531 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1532 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1533 (COPY_TO_REGCLASS VK64:$src, VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001534}
1535
1536// Mask unary operation
1537// - KNOT
1538multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001539 RegisterClass KRC, SDPatternOperator OpNode,
1540 Predicate prd> {
1541 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001542 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001543 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001544 [(set KRC:$dst, (OpNode KRC:$src))]>;
1545}
1546
Robert Khasanov74acbb72014-07-23 14:49:42 +00001547multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1548 SDPatternOperator OpNode> {
1549 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1550 HasDQI>, VEX, PD;
1551 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1552 HasAVX512>, VEX, PS;
1553 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1554 HasBWI>, VEX, PD, VEX_W;
1555 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1556 HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001557}
1558
Robert Khasanov74acbb72014-07-23 14:49:42 +00001559defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001560
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001561multiclass avx512_mask_unop_int<string IntName, string InstName> {
1562 let Predicates = [HasAVX512] in
1563 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1564 (i16 GR16:$src)),
1565 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1566 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1567}
1568defm : avx512_mask_unop_int<"knot", "KNOT">;
1569
Robert Khasanov74acbb72014-07-23 14:49:42 +00001570let Predicates = [HasDQI] in
1571def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1572let Predicates = [HasAVX512] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001573def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001574let Predicates = [HasBWI] in
1575def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1576let Predicates = [HasBWI] in
1577def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1578
1579// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1580let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001581def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1582 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1583
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001584def : Pat<(not VK8:$src),
1585 (COPY_TO_REGCLASS
1586 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001587}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001588
1589// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001590// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001591multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00001592 RegisterClass KRC, SDPatternOperator OpNode,
1593 Predicate prd> {
1594 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001595 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1596 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001597 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001598 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1599}
1600
Robert Khasanov595683d2014-07-28 13:46:45 +00001601multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1602 SDPatternOperator OpNode> {
1603 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1604 HasDQI>, VEX_4V, VEX_L, PD;
1605 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1606 HasAVX512>, VEX_4V, VEX_L, PS;
1607 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1608 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1609 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1610 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001611}
1612
1613def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1614def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1615
1616let isCommutable = 1 in {
Robert Khasanov595683d2014-07-28 13:46:45 +00001617 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1618 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1619 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1620 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001621}
Robert Khasanov595683d2014-07-28 13:46:45 +00001622let isCommutable = 0 in
1623 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001624
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001625def : Pat<(xor VK1:$src1, VK1:$src2),
1626 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1627 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1628
1629def : Pat<(or VK1:$src1, VK1:$src2),
1630 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1631 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1632
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00001633def : Pat<(and VK1:$src1, VK1:$src2),
1634 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1635 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1636
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001637multiclass avx512_mask_binop_int<string IntName, string InstName> {
1638 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001639 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1640 (i16 GR16:$src1), (i16 GR16:$src2)),
1641 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1642 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1643 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001644}
1645
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001646defm : avx512_mask_binop_int<"kand", "KAND">;
1647defm : avx512_mask_binop_int<"kandn", "KANDN">;
1648defm : avx512_mask_binop_int<"kor", "KOR">;
1649defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1650defm : avx512_mask_binop_int<"kxor", "KXOR">;
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001651
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001652// With AVX-512, 8-bit mask is promoted to 16-bit mask.
1653multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1654 let Predicates = [HasAVX512] in
1655 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1656 (COPY_TO_REGCLASS
1657 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1658 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1659}
1660
1661defm : avx512_binop_pat<and, KANDWrr>;
1662defm : avx512_binop_pat<andn, KANDNWrr>;
1663defm : avx512_binop_pat<or, KORWrr>;
1664defm : avx512_binop_pat<xnor, KXNORWrr>;
1665defm : avx512_binop_pat<xor, KXORWrr>;
1666
1667// Mask unpacking
1668multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001669 RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001670 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001671 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001672 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001673 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001674}
1675
1676multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001677 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
Craig Topperae11aed2014-01-14 07:41:20 +00001678 VEX_4V, VEX_L, PD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001679}
1680
1681defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001682def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1683 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1684 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1685
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001686
1687multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1688 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001689 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1690 (i16 GR16:$src1), (i16 GR16:$src2)),
1691 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1692 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1693 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001694}
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001695defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001696
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001697// Mask bit testing
1698multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1699 SDNode OpNode> {
1700 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1701 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001702 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001703 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1704}
1705
1706multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1707 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001708 VEX, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001709}
1710
1711defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001712
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001713def : Pat<(X86cmp VK1:$src1, (i1 0)),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001714 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001715 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001716
1717// Mask shift
1718multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1719 SDNode OpNode> {
1720 let Predicates = [HasAVX512] in
1721 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1722 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001723 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001724 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1725}
1726
1727multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1728 SDNode OpNode> {
1729 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topperae11aed2014-01-14 07:41:20 +00001730 VEX, TAPD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001731}
1732
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001733defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1734defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001735
1736// Mask setting all 0s or 1s
1737multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1738 let Predicates = [HasAVX512] in
1739 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1740 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1741 [(set KRC:$dst, (VT Val))]>;
1742}
1743
1744multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001745 defm B : avx512_mask_setop<VK8, v8i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001746 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1747}
1748
1749defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1750defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1751
1752// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1753let Predicates = [HasAVX512] in {
1754 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1755 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001756 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1757 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1758 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001759}
1760def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1761 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1762
1763def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1764 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1765
1766def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1767 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1768
Elena Demikhovsky9737e382014-03-02 09:19:44 +00001769def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1770 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1771
1772def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1773 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001774//===----------------------------------------------------------------------===//
1775// AVX-512 - Aligned and unaligned load and store
1776//
1777
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001778multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1779 RegisterClass KRC, RegisterClass RC,
1780 ValueType vt, ValueType zvt, X86MemOperand memop,
1781 Domain d, bit IsReMaterializable = 1> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001782let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001783 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001784 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1785 d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001786 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001787 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1788 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001789 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001790 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1791 SchedRW = [WriteLoad] in
1792 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1793 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1794 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1795 d>, EVEX;
1796
1797 let AddedComplexity = 20 in {
1798 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1799 let hasSideEffects = 0 in
1800 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1801 (ins RC:$src0, KRC:$mask, RC:$src1),
1802 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1803 "${dst} {${mask}}, $src1}"),
1804 [(set RC:$dst, (vt (vselect KRC:$mask,
1805 (vt RC:$src1),
1806 (vt RC:$src0))))],
1807 d>, EVEX, EVEX_K;
1808 let mayLoad = 1, SchedRW = [WriteLoad] in
1809 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1810 (ins RC:$src0, KRC:$mask, memop:$src1),
1811 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1812 "${dst} {${mask}}, $src1}"),
1813 [(set RC:$dst, (vt
1814 (vselect KRC:$mask,
1815 (vt (bitconvert (ld_frag addr:$src1))),
1816 (vt RC:$src0))))],
1817 d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001818 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001819 let mayLoad = 1, SchedRW = [WriteLoad] in
1820 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1821 (ins KRC:$mask, memop:$src),
1822 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1823 "${dst} {${mask}} {z}, $src}"),
1824 [(set RC:$dst, (vt
1825 (vselect KRC:$mask,
1826 (vt (bitconvert (ld_frag addr:$src))),
1827 (vt (bitconvert (zvt immAllZerosV))))))],
1828 d>, EVEX, EVEX_KZ;
1829 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001830}
1831
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001832multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1833 string elty, string elsz, string vsz512,
1834 string vsz256, string vsz128, Domain d,
1835 Predicate prd, bit IsReMaterializable = 1> {
1836 let Predicates = [prd] in
1837 defm Z : avx512_load<opc, OpcodeStr,
1838 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1839 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1840 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1841 !cast<X86MemOperand>(elty##"512mem"), d,
1842 IsReMaterializable>, EVEX_V512;
1843
1844 let Predicates = [prd, HasVLX] in {
1845 defm Z256 : avx512_load<opc, OpcodeStr,
1846 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1847 "v"##vsz256##elty##elsz, "v4i64")),
1848 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1849 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1850 !cast<X86MemOperand>(elty##"256mem"), d,
1851 IsReMaterializable>, EVEX_V256;
1852
1853 defm Z128 : avx512_load<opc, OpcodeStr,
1854 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1855 "v"##vsz128##elty##elsz, "v2i64")),
1856 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1857 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1858 !cast<X86MemOperand>(elty##"128mem"), d,
1859 IsReMaterializable>, EVEX_V128;
1860 }
1861}
1862
1863
1864multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1865 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1866 X86MemOperand memop, Domain d> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001867 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1868 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001869 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001870 EVEX;
1871 let Constraints = "$src1 = $dst" in
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001872 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1873 (ins RC:$src1, KRC:$mask, RC:$src2),
1874 !strconcat(OpcodeStr,
1875 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001876 EVEX, EVEX_K;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001877 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001878 (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001879 !strconcat(OpcodeStr,
1880 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001881 [], d>, EVEX, EVEX_KZ;
1882 }
1883 let mayStore = 1 in {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001884 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1885 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1886 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001887 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001888 (ins memop:$dst, KRC:$mask, RC:$src),
1889 !strconcat(OpcodeStr,
1890 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001891 [], d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001892 }
1893}
1894
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001895
1896multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1897 string st_suff_512, string st_suff_256,
1898 string st_suff_128, string elty, string elsz,
1899 string vsz512, string vsz256, string vsz128,
1900 Domain d, Predicate prd> {
1901 let Predicates = [prd] in
1902 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1903 !cast<ValueType>("v"##vsz512##elty##elsz),
1904 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1905 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1906
1907 let Predicates = [prd, HasVLX] in {
1908 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1909 !cast<ValueType>("v"##vsz256##elty##elsz),
1910 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1911 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1912
1913 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1914 !cast<ValueType>("v"##vsz128##elty##elsz),
1915 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1916 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1917 }
1918}
1919
1920defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1921 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1922 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1923 "512", "256", "", "f", "32", "16", "8", "4",
1924 SSEPackedSingle, HasAVX512>,
1925 PS, EVEX_CD8<32, CD8VF>;
1926
1927defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1928 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1929 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1930 "512", "256", "", "f", "64", "8", "4", "2",
1931 SSEPackedDouble, HasAVX512>,
1932 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1933
1934defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1935 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1936 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1937 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1938 PS, EVEX_CD8<32, CD8VF>;
1939
1940defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1941 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1942 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1943 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1944 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1945
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001946def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001947 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001948 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001949
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001950def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1951 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1952 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001953
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001954def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1955 GR16:$mask),
1956 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1957 VR512:$src)>;
1958def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1959 GR8:$mask),
1960 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1961 VR512:$src)>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001962
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001963defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1964 "16", "8", "4", SSEPackedInt, HasAVX512>,
1965 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1966 "512", "256", "", "i", "32", "16", "8", "4",
1967 SSEPackedInt, HasAVX512>,
1968 PD, EVEX_CD8<32, CD8VF>;
1969
1970defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1971 "8", "4", "2", SSEPackedInt, HasAVX512>,
1972 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1973 "512", "256", "", "i", "64", "8", "4", "2",
1974 SSEPackedInt, HasAVX512>,
1975 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1976
1977defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1978 "64", "32", "16", SSEPackedInt, HasBWI>,
1979 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1980 "i", "8", "64", "32", "16", SSEPackedInt,
1981 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1982
1983defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1984 "32", "16", "8", SSEPackedInt, HasBWI>,
1985 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1986 "i", "16", "32", "16", "8", SSEPackedInt,
1987 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1988
1989defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1990 "16", "8", "4", SSEPackedInt, HasAVX512>,
1991 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1992 "i", "32", "16", "8", "4", SSEPackedInt,
1993 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1994
1995defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1996 "8", "4", "2", SSEPackedInt, HasAVX512>,
1997 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1998 "i", "64", "8", "4", "2", SSEPackedInt,
1999 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00002000
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002001def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
2002 (v16i32 immAllZerosV), GR16:$mask)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002003 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002004
2005def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002006 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2007 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002008
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002009def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002010 GR16:$mask),
2011 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002012 VR512:$src)>;
2013def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002014 GR8:$mask),
2015 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002016 VR512:$src)>;
2017
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002018let AddedComplexity = 20 in {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002019def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002020 (bc_v8i64 (v16i32 immAllZerosV)))),
2021 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002022
2023def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002024 (v8i64 VR512:$src))),
2025 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002026 VK8), VR512:$src)>;
2027
2028def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2029 (v16i32 immAllZerosV))),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002030 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002031
2032def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002033 (v16i32 VR512:$src))),
2034 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002035}
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002036
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002037// Move Int Doubleword to Packed Double Int
2038//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002039def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002040 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002041 [(set VR128X:$dst,
2042 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2043 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002044def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002045 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002046 [(set VR128X:$dst,
2047 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2048 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002049def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002050 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002051 [(set VR128X:$dst,
2052 (v2i64 (scalar_to_vector GR64:$src)))],
2053 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
Craig Topper88adf2a2013-10-12 05:41:08 +00002054let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002055def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002056 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002057 [(set FR64:$dst, (bitconvert GR64:$src))],
2058 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002059def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002060 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002061 [(set GR64:$dst, (bitconvert FR64:$src))],
2062 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002063}
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002064def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002065 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002066 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2067 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2068 EVEX_CD8<64, CD8VT1>;
2069
2070// Move Int Doubleword to Single Scalar
2071//
Craig Topper88adf2a2013-10-12 05:41:08 +00002072let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002073def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002074 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002075 [(set FR32X:$dst, (bitconvert GR32:$src))],
2076 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2077
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002078def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002079 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002080 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2081 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002082}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002083
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002084// Move doubleword from xmm register to r/m32
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002085//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002086def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002087 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002088 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2089 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2090 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002091def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002092 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002093 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002094 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2095 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2096 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2097
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002098// Move quadword from xmm1 register to r/m64
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002099//
2100def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002101 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002102 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2103 (iPTR 0)))],
Craig Topperae11aed2014-01-14 07:41:20 +00002104 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002105 Requires<[HasAVX512, In64BitMode]>;
2106
Elena Demikhovsky85aeffa2013-10-03 12:03:26 +00002107def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002108 (ins i64mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002109 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002110 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2111 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topperae11aed2014-01-14 07:41:20 +00002112 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002113 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2114
2115// Move Scalar Single to Double Int
2116//
Craig Topper88adf2a2013-10-12 05:41:08 +00002117let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002118def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002119 (ins FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002120 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002121 [(set GR32:$dst, (bitconvert FR32X:$src))],
2122 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002123def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002124 (ins i32mem:$dst, FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002125 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002126 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2127 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002128}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002129
2130// Move Quadword Int to Packed Quadword Int
2131//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002132def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002133 (ins i64mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002134 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002135 [(set VR128X:$dst,
2136 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2137 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2138
2139//===----------------------------------------------------------------------===//
2140// AVX-512 MOVSS, MOVSD
2141//===----------------------------------------------------------------------===//
2142
2143multiclass avx512_move_scalar <string asm, RegisterClass RC,
2144 SDNode OpNode, ValueType vt,
2145 X86MemOperand x86memop, PatFrag mem_pat> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002146 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002147 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002148 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002149 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2150 (scalar_to_vector RC:$src2))))],
2151 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002152 let Constraints = "$src1 = $dst" in
2153 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2154 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2155 !strconcat(asm,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002156 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002157 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002158 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002159 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002160 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2161 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002162 let mayStore = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002163 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002164 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002165 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2166 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002167 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2168 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2169 [], IIC_SSE_MOV_S_MR>,
2170 EVEX, VEX_LIG, EVEX_K;
2171 } // mayStore
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002172 } //hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002173}
2174
2175let ExeDomain = SSEPackedSingle in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002176defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002177 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2178
2179let ExeDomain = SSEPackedDouble in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002180defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002181 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2182
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002183def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2184 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2185 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2186
2187def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2188 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2189 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002190
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002191def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2192 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2193 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2194
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002195// For the disassembler
Craig Topper3484fc22014-01-05 04:17:28 +00002196let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002197 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2198 (ins VR128X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002199 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002200 IIC_SSE_MOV_S_RR>,
2201 XS, EVEX_4V, VEX_LIG;
2202 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2203 (ins VR128X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002204 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002205 IIC_SSE_MOV_S_RR>,
2206 XD, EVEX_4V, VEX_LIG, VEX_W;
2207}
2208
2209let Predicates = [HasAVX512] in {
2210 let AddedComplexity = 15 in {
2211 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2212 // MOVS{S,D} to the lower bits.
2213 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2214 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2215 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2216 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2217 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2218 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2219 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2220 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2221
2222 // Move low f32 and clear high bits.
2223 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2224 (SUBREG_TO_REG (i32 0),
2225 (VMOVSSZrr (v4f32 (V_SET0)),
2226 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2227 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2228 (SUBREG_TO_REG (i32 0),
2229 (VMOVSSZrr (v4i32 (V_SET0)),
2230 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2231 }
2232
2233 let AddedComplexity = 20 in {
2234 // MOVSSrm zeros the high parts of the register; represent this
2235 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2236 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2237 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2238 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2239 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2240 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2241 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2242
2243 // MOVSDrm zeros the high parts of the register; represent this
2244 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2245 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2246 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2247 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2248 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2249 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2250 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2251 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2252 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2253 def : Pat<(v2f64 (X86vzload addr:$src)),
2254 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2255
2256 // Represent the same patterns above but in the form they appear for
2257 // 256-bit types
2258 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2259 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002260 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002261 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2262 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2263 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2264 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2265 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2266 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2267 }
2268 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2269 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2270 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2271 FR32X:$src)), sub_xmm)>;
2272 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2273 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2274 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2275 FR64X:$src)), sub_xmm)>;
2276 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2277 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002278 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002279
2280 // Move low f64 and clear high bits.
2281 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2282 (SUBREG_TO_REG (i32 0),
2283 (VMOVSDZrr (v2f64 (V_SET0)),
2284 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2285
2286 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2287 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2288 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2289
2290 // Extract and store.
2291 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2292 addr:$dst),
2293 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2294 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2295 addr:$dst),
2296 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2297
2298 // Shuffle with VMOVSS
2299 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2300 (VMOVSSZrr (v4i32 VR128X:$src1),
2301 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2302 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2303 (VMOVSSZrr (v4f32 VR128X:$src1),
2304 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2305
2306 // 256-bit variants
2307 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2308 (SUBREG_TO_REG (i32 0),
2309 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2310 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2311 sub_xmm)>;
2312 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2313 (SUBREG_TO_REG (i32 0),
2314 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2315 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2316 sub_xmm)>;
2317
2318 // Shuffle with VMOVSD
2319 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2320 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2321 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2322 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2323 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2324 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2325 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2326 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2327
2328 // 256-bit variants
2329 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2330 (SUBREG_TO_REG (i32 0),
2331 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2332 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2333 sub_xmm)>;
2334 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2335 (SUBREG_TO_REG (i32 0),
2336 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2337 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2338 sub_xmm)>;
2339
2340 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2341 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2342 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2343 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2344 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2345 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2346 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2347 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2348}
2349
2350let AddedComplexity = 15 in
2351def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2352 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002353 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002354 [(set VR128X:$dst, (v2i64 (X86vzmovl
2355 (v2i64 VR128X:$src))))],
2356 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2357
2358let AddedComplexity = 20 in
2359def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2360 (ins i128mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002361 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002362 [(set VR128X:$dst, (v2i64 (X86vzmovl
2363 (loadv2i64 addr:$src))))],
2364 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2365 EVEX_CD8<8, CD8VT8>;
2366
2367let Predicates = [HasAVX512] in {
2368 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2369 let AddedComplexity = 20 in {
2370 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2371 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002372 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2373 (VMOV64toPQIZrr GR64:$src)>;
2374 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2375 (VMOVDI2PDIZrr GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002376
2377 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2378 (VMOVDI2PDIZrm addr:$src)>;
2379 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2380 (VMOVDI2PDIZrm addr:$src)>;
2381 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2382 (VMOVZPQILo2PQIZrm addr:$src)>;
2383 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2384 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00002385 def : Pat<(v2i64 (X86vzload addr:$src)),
2386 (VMOVZPQILo2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002387 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002388
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002389 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2390 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2391 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2392 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2393 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2394 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2395 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2396}
2397
2398def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2399 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2400
2401def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2402 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2403
2404def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2405 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2406
2407def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2408 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2409
2410//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00002411// AVX-512 - Non-temporals
2412//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00002413let SchedRW = [WriteLoad] in {
2414 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2415 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2416 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2417 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2418 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002419
Robert Khasanoved882972014-08-13 10:46:00 +00002420 let Predicates = [HasAVX512, HasVLX] in {
2421 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2422 (ins i256mem:$src),
2423 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2424 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2425 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002426
Robert Khasanoved882972014-08-13 10:46:00 +00002427 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2428 (ins i128mem:$src),
2429 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2430 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2431 EVEX_CD8<64, CD8VF>;
2432 }
Adam Nemetefd07852014-06-18 16:51:10 +00002433}
2434
Robert Khasanoved882972014-08-13 10:46:00 +00002435multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2436 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2437 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2438 let SchedRW = [WriteStore], mayStore = 1,
2439 AddedComplexity = 400 in
2440 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2441 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2442 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2443}
2444
2445multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2446 string elty, string elsz, string vsz512,
2447 string vsz256, string vsz128, Domain d,
2448 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2449 let Predicates = [prd] in
2450 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2451 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2452 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2453 EVEX_V512;
2454
2455 let Predicates = [prd, HasVLX] in {
2456 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2457 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2458 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2459 EVEX_V256;
2460
2461 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2462 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2463 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2464 EVEX_V128;
2465 }
2466}
2467
2468defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2469 "i", "64", "8", "4", "2", SSEPackedInt,
2470 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2471
2472defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2473 "f", "64", "8", "4", "2", SSEPackedDouble,
2474 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2475
2476defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2477 "f", "32", "16", "8", "4", SSEPackedSingle,
2478 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2479
Adam Nemet7f62b232014-06-10 16:39:53 +00002480//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002481// AVX-512 - Integer arithmetic
2482//
2483multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002484 ValueType OpVT, RegisterClass KRC,
2485 RegisterClass RC, PatFrag memop_frag,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002486 X86MemOperand x86memop, PatFrag scalar_mfrag,
2487 X86MemOperand x86scalar_mop, string BrdcstStr,
2488 OpndItins itins, bit IsCommutable = 0> {
2489 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002490 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2491 (ins RC:$src1, RC:$src2),
2492 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2493 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2494 itins.rr>, EVEX_4V;
2495 let AddedComplexity = 30 in {
2496 let Constraints = "$src0 = $dst" in
2497 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2498 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2499 !strconcat(OpcodeStr,
2500 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2501 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2502 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2503 RC:$src0)))],
2504 itins.rr>, EVEX_4V, EVEX_K;
2505 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2506 (ins KRC:$mask, RC:$src1, RC:$src2),
2507 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2508 "|$dst {${mask}} {z}, $src1, $src2}"),
2509 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2510 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2511 (OpVT immAllZerosV))))],
2512 itins.rr>, EVEX_4V, EVEX_KZ;
2513 }
2514
2515 let mayLoad = 1 in {
2516 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2517 (ins RC:$src1, x86memop:$src2),
2518 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2519 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2520 itins.rm>, EVEX_4V;
2521 let AddedComplexity = 30 in {
2522 let Constraints = "$src0 = $dst" in
2523 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2524 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2525 !strconcat(OpcodeStr,
2526 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2527 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2528 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2529 RC:$src0)))],
2530 itins.rm>, EVEX_4V, EVEX_K;
2531 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2532 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2533 !strconcat(OpcodeStr,
2534 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2535 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2536 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2537 (OpVT immAllZerosV))))],
2538 itins.rm>, EVEX_4V, EVEX_KZ;
2539 }
2540 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2541 (ins RC:$src1, x86scalar_mop:$src2),
2542 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2543 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2544 [(set RC:$dst, (OpNode RC:$src1,
2545 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2546 itins.rm>, EVEX_4V, EVEX_B;
2547 let AddedComplexity = 30 in {
2548 let Constraints = "$src0 = $dst" in
2549 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2550 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2551 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2552 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2553 BrdcstStr, "}"),
2554 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2555 (OpNode (OpVT RC:$src1),
2556 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2557 RC:$src0)))],
2558 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2559 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2560 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2561 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2562 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2563 BrdcstStr, "}"),
2564 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2565 (OpNode (OpVT RC:$src1),
2566 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2567 (OpVT immAllZerosV))))],
2568 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2569 }
2570 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002571}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002572
2573multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2574 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2575 PatFrag memop_frag, X86MemOperand x86memop,
2576 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2577 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002578 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002579 {
2580 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002581 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002582 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002583 []>, EVEX_4V;
2584 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2585 (ins KRC:$mask, RC:$src1, RC:$src2),
2586 !strconcat(OpcodeStr,
2587 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2588 [], itins.rr>, EVEX_4V, EVEX_K;
2589 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2590 (ins KRC:$mask, RC:$src1, RC:$src2),
2591 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2592 "|$dst {${mask}} {z}, $src1, $src2}"),
2593 [], itins.rr>, EVEX_4V, EVEX_KZ;
2594 }
2595 let mayLoad = 1 in {
2596 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2597 (ins RC:$src1, x86memop:$src2),
2598 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2599 []>, EVEX_4V;
2600 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2601 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2602 !strconcat(OpcodeStr,
2603 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2604 [], itins.rm>, EVEX_4V, EVEX_K;
2605 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2606 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2607 !strconcat(OpcodeStr,
2608 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2609 [], itins.rm>, EVEX_4V, EVEX_KZ;
2610 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2611 (ins RC:$src1, x86scalar_mop:$src2),
2612 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2613 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2614 [], itins.rm>, EVEX_4V, EVEX_B;
2615 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2616 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2617 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2618 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2619 BrdcstStr, "}"),
2620 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2621 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2622 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2623 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2624 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2625 BrdcstStr, "}"),
2626 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2627 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002628}
2629
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002630defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2631 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2632 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002633
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002634defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2635 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2636 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002637
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002638defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2639 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2640 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002641
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002642defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2643 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2644 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002645
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002646defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2647 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2648 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002649
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002650defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2651 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2652 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2653 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002654
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002655defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2656 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2657 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002658
2659def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2660 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2661
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002662def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2663 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2664 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2665def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2666 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2667 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2668
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002669defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2670 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2671 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002672 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002673defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2674 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2675 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002676 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002677
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002678defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2679 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2680 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002681 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002682defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2683 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2684 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002685 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002686
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002687defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2688 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2689 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002690 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002691defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2692 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2693 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002694 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002695
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002696defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2697 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2698 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002699 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002700defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2701 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2702 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002703 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002704
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002705def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2706 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2707 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2708def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2709 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2710 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2711def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2712 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2713 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2714def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2715 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2716 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2717def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2718 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2719 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2720def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2721 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2722 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2723def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2724 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2725 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2726def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2727 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2728 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002729//===----------------------------------------------------------------------===//
2730// AVX-512 - Unpack Instructions
2731//===----------------------------------------------------------------------===//
2732
2733multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2734 PatFrag mem_frag, RegisterClass RC,
2735 X86MemOperand x86memop, string asm,
2736 Domain d> {
2737 def rr : AVX512PI<opc, MRMSrcReg,
2738 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2739 asm, [(set RC:$dst,
2740 (vt (OpNode RC:$src1, RC:$src2)))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002741 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002742 def rm : AVX512PI<opc, MRMSrcMem,
2743 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2744 asm, [(set RC:$dst,
2745 (vt (OpNode RC:$src1,
2746 (bitconvert (mem_frag addr:$src2)))))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002747 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002748}
2749
2750defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2751 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002752 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002753defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2754 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002755 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002756defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2757 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002758 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002759defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2760 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002761 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002762
2763multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2764 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2765 X86MemOperand x86memop> {
2766 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2767 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002768 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002769 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2770 IIC_SSE_UNPCK>, EVEX_4V;
2771 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2772 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002773 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002774 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2775 (bitconvert (memop_frag addr:$src2)))))],
2776 IIC_SSE_UNPCK>, EVEX_4V;
2777}
2778defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2779 VR512, memopv16i32, i512mem>, EVEX_V512,
2780 EVEX_CD8<32, CD8VF>;
2781defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2782 VR512, memopv8i64, i512mem>, EVEX_V512,
2783 VEX_W, EVEX_CD8<64, CD8VF>;
2784defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2785 VR512, memopv16i32, i512mem>, EVEX_V512,
2786 EVEX_CD8<32, CD8VF>;
2787defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2788 VR512, memopv8i64, i512mem>, EVEX_V512,
2789 VEX_W, EVEX_CD8<64, CD8VF>;
2790//===----------------------------------------------------------------------===//
2791// AVX-512 - PSHUFD
2792//
2793
2794multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2795 SDNode OpNode, PatFrag mem_frag,
2796 X86MemOperand x86memop, ValueType OpVT> {
2797 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2798 (ins RC:$src1, i8imm:$src2),
2799 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002800 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002801 [(set RC:$dst,
2802 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2803 EVEX;
2804 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2805 (ins x86memop:$src1, i8imm:$src2),
2806 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002807 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002808 [(set RC:$dst,
2809 (OpVT (OpNode (mem_frag addr:$src1),
2810 (i8 imm:$src2))))]>, EVEX;
2811}
2812
2813defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
Craig Topperae11aed2014-01-14 07:41:20 +00002814 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002815
2816let ExeDomain = SSEPackedSingle in
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002817defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilpi,
Craig Topperae11aed2014-01-14 07:41:20 +00002818 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002819 EVEX_CD8<32, CD8VF>;
2820let ExeDomain = SSEPackedDouble in
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002821defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilpi,
Craig Topperae11aed2014-01-14 07:41:20 +00002822 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002823 VEX_W, EVEX_CD8<32, CD8VF>;
2824
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002825def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002826 (VPERMILPSZri VR512:$src1, imm:$imm)>;
Chandler Carruthed5dfff2014-09-22 22:29:42 +00002827def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002828 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2829
2830//===----------------------------------------------------------------------===//
2831// AVX-512 Logical Instructions
2832//===----------------------------------------------------------------------===//
2833
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002834defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002835 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2836 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002837defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002838 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2839 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002840defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002841 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2842 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002843defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002844 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2845 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002846defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002847 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2848 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002849defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002850 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2851 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002852defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002853 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2854 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002855defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2856 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2857 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002858
2859//===----------------------------------------------------------------------===//
2860// AVX-512 FP arithmetic
2861//===----------------------------------------------------------------------===//
2862
2863multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2864 SizeItins itins> {
Elena Demikhovskycf088092013-12-11 14:31:04 +00002865 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002866 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2867 EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00002868 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002869 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2870 EVEX_CD8<64, CD8VT1>;
2871}
2872
2873let isCommutable = 1 in {
2874defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2875defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2876defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2877defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2878}
2879let isCommutable = 0 in {
2880defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2881defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2882}
2883
2884multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002885 RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002886 RegisterClass RC, ValueType vt,
2887 X86MemOperand x86memop, PatFrag mem_frag,
2888 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2889 string BrdcstStr,
2890 Domain d, OpndItins itins, bit commutable> {
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002891 let isCommutable = commutable in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002892 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002893 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002894 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Craig Topperda7160d2014-02-01 08:17:56 +00002895 EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002896
2897 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2898 !strconcat(OpcodeStr,
2899 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2900 [], itins.rr, d>, EVEX_4V, EVEX_K;
2901
2902 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2903 !strconcat(OpcodeStr,
2904 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2905 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2906 }
2907
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002908 let mayLoad = 1 in {
2909 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002910 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002911 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
Craig Topperda7160d2014-02-01 08:17:56 +00002912 itins.rm, d>, EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002913
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002914 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2915 (ins RC:$src1, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002916 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002917 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002918 [(set RC:$dst, (OpNode RC:$src1,
2919 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
Craig Topperda7160d2014-02-01 08:17:56 +00002920 itins.rm, d>, EVEX_4V, EVEX_B;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002921
2922 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2923 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2924 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2925 [], itins.rm, d>, EVEX_4V, EVEX_K;
2926
2927 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2928 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2929 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2930 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2931
2932 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2933 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2934 " \t{${src2}", BrdcstStr,
2935 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2936 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2937
2938 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2939 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2940 " \t{${src2}", BrdcstStr,
2941 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2942 BrdcstStr, "}"),
2943 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2944 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002945}
2946
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002947defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002948 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002949 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002950
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002951defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002952 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2953 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002954 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002955
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002956defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002957 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002958 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002959defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002960 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2961 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002962 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002963
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002964defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002965 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2966 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002967 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002968defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002969 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2970 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002971 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002972
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002973defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002974 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2975 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002976 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002977defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002978 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2979 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002980 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002981
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002982defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002983 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002984 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002985defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002986 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002987 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002988
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002989defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002990 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2991 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002992 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002993defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002994 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2995 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002996 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002997
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00002998def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2999 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3000 (i16 -1), FROUND_CURRENT)),
3001 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
3002
3003def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
3004 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3005 (i8 -1), FROUND_CURRENT)),
3006 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
3007
3008def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
3009 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3010 (i16 -1), FROUND_CURRENT)),
3011 (VMINPSZrr VR512:$src1, VR512:$src2)>;
3012
3013def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
3014 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3015 (i8 -1), FROUND_CURRENT)),
3016 (VMINPDZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003017//===----------------------------------------------------------------------===//
3018// AVX-512 VPTESTM instructions
3019//===----------------------------------------------------------------------===//
3020
3021multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3022 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3023 SDNode OpNode, ValueType vt> {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003024 def rr : AVX512PI<opc, MRMSrcReg,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003025 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003026 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003027 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
3028 SSEPackedInt>, EVEX_4V;
3029 def rm : AVX512PI<opc, MRMSrcMem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003030 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003031 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003032 [(set KRC:$dst, (OpNode (vt RC:$src1),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003033 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003034}
3035
3036defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003037 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003038 EVEX_CD8<32, CD8VF>;
3039defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003040 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003041 EVEX_CD8<64, CD8VF>;
3042
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003043let Predicates = [HasCDI] in {
3044defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
3045 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
3046 EVEX_CD8<32, CD8VF>;
3047defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003048 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003049 EVEX_CD8<64, CD8VF>;
3050}
3051
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003052def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3053 (v16i32 VR512:$src2), (i16 -1))),
3054 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3055
3056def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3057 (v8i64 VR512:$src2), (i8 -1))),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003058 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003059//===----------------------------------------------------------------------===//
3060// AVX-512 Shift instructions
3061//===----------------------------------------------------------------------===//
3062multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3063 string OpcodeStr, SDNode OpNode, RegisterClass RC,
3064 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
3065 RegisterClass KRC> {
3066 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003067 (ins RC:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003068 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Lang Hames27839932013-10-21 17:51:24 +00003069 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003070 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3071 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003072 (ins KRC:$mask, RC:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003073 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003074 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003075 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3076 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003077 (ins x86memop:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003078 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003079 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
Lang Hames27839932013-10-21 17:51:24 +00003080 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003081 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003082 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003083 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003084 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003085 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3086}
3087
3088multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3089 RegisterClass RC, ValueType vt, ValueType SrcVT,
3090 PatFrag bc_frag, RegisterClass KRC> {
3091 // src2 is always 128-bit
3092 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3093 (ins RC:$src1, VR128X:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003094 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003095 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3096 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3097 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3098 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3099 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003100 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003101 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3102 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3103 (ins RC:$src1, i128mem:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003104 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003105 [(set RC:$dst, (vt (OpNode RC:$src1,
3106 (bc_frag (memopv2i64 addr:$src2)))))],
3107 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3108 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3109 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3110 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003111 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003112 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3113}
3114
3115defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3116 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3117 EVEX_V512, EVEX_CD8<32, CD8VF>;
3118defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3119 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3120 EVEX_CD8<32, CD8VQ>;
3121
3122defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3123 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3124 EVEX_CD8<64, CD8VF>, VEX_W;
3125defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3126 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3127 EVEX_CD8<64, CD8VQ>, VEX_W;
3128
3129defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3130 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3131 EVEX_CD8<32, CD8VF>;
3132defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3133 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3134 EVEX_CD8<32, CD8VQ>;
3135
3136defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3137 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3138 EVEX_CD8<64, CD8VF>, VEX_W;
3139defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3140 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3141 EVEX_CD8<64, CD8VQ>, VEX_W;
3142
3143defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3144 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3145 EVEX_V512, EVEX_CD8<32, CD8VF>;
3146defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3147 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3148 EVEX_CD8<32, CD8VQ>;
3149
3150defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3151 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3152 EVEX_CD8<64, CD8VF>, VEX_W;
3153defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3154 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3155 EVEX_CD8<64, CD8VQ>, VEX_W;
3156
3157//===-------------------------------------------------------------------===//
3158// Variable Bit Shifts
3159//===-------------------------------------------------------------------===//
3160multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3161 RegisterClass RC, ValueType vt,
3162 X86MemOperand x86memop, PatFrag mem_frag> {
3163 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3164 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003165 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003166 [(set RC:$dst,
3167 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3168 EVEX_4V;
3169 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3170 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003171 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003172 [(set RC:$dst,
3173 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3174 EVEX_4V;
3175}
3176
3177defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3178 i512mem, memopv16i32>, EVEX_V512,
3179 EVEX_CD8<32, CD8VF>;
3180defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3181 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3182 EVEX_CD8<64, CD8VF>;
3183defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3184 i512mem, memopv16i32>, EVEX_V512,
3185 EVEX_CD8<32, CD8VF>;
3186defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3187 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3188 EVEX_CD8<64, CD8VF>;
3189defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3190 i512mem, memopv16i32>, EVEX_V512,
3191 EVEX_CD8<32, CD8VF>;
3192defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3193 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3194 EVEX_CD8<64, CD8VF>;
3195
3196//===----------------------------------------------------------------------===//
3197// AVX-512 - MOVDDUP
3198//===----------------------------------------------------------------------===//
3199
3200multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3201 X86MemOperand x86memop, PatFrag memop_frag> {
3202def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003203 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003204 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3205def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003206 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003207 [(set RC:$dst,
3208 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3209}
3210
3211defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3212 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3213def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3214 (VMOVDDUPZrm addr:$src)>;
3215
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003216//===---------------------------------------------------------------------===//
3217// Replicate Single FP - MOVSHDUP and MOVSLDUP
3218//===---------------------------------------------------------------------===//
3219multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3220 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3221 X86MemOperand x86memop> {
3222 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003223 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003224 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3225 let mayLoad = 1 in
3226 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003227 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003228 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3229}
3230
3231defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3232 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3233 EVEX_CD8<32, CD8VF>;
3234defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3235 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3236 EVEX_CD8<32, CD8VF>;
3237
3238def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3239def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3240 (VMOVSHDUPZrm addr:$src)>;
3241def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3242def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3243 (VMOVSLDUPZrm addr:$src)>;
3244
3245//===----------------------------------------------------------------------===//
3246// Move Low to High and High to Low packed FP Instructions
3247//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003248def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3249 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003250 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003251 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3252 IIC_SSE_MOV_LH>, EVEX_4V;
3253def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3254 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003255 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003256 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3257 IIC_SSE_MOV_LH>, EVEX_4V;
3258
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003259let Predicates = [HasAVX512] in {
3260 // MOVLHPS patterns
3261 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3262 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3263 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3264 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003265
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003266 // MOVHLPS patterns
3267 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3268 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3269}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003270
3271//===----------------------------------------------------------------------===//
3272// FMA - Fused Multiply Operations
3273//
3274let Constraints = "$src1 = $dst" in {
3275multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3276 RegisterClass RC, X86MemOperand x86memop,
3277 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
Adam Nemet2e91ee52014-08-14 17:13:19 +00003278 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3279 RegisterClass KRC> {
3280 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3281 (ins RC:$src2, RC:$src3),
3282 OpcodeStr, "$src3, $src2", "$src2, $src3",
3283 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3284 AVX512FMA3Base;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003285
3286 let mayLoad = 1 in
3287 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3288 (ins RC:$src1, RC:$src2, x86memop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003289 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003290 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3291 (mem_frag addr:$src3))))]>;
3292 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3293 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003294 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003295 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3296 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3297 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3298}
3299} // Constraints = "$src1 = $dst"
3300
3301let ExeDomain = SSEPackedSingle in {
3302 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3303 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003304 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003305 EVEX_CD8<32, CD8VF>;
3306 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3307 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003308 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003309 EVEX_CD8<32, CD8VF>;
3310 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3311 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003312 X86Fmaddsub, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003313 EVEX_V512, EVEX_CD8<32, CD8VF>;
3314 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3315 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003316 X86Fmsubadd, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003317 EVEX_V512, EVEX_CD8<32, CD8VF>;
3318 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3319 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003320 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003321 EVEX_CD8<32, CD8VF>;
3322 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3323 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003324 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003325 EVEX_CD8<32, CD8VF>;
3326}
3327let ExeDomain = SSEPackedDouble in {
3328 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3329 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003330 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003331 VEX_W, EVEX_CD8<64, CD8VF>;
3332 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3333 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003334 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003335 EVEX_CD8<64, CD8VF>;
3336 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3337 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003338 X86Fmaddsub, v8f64, VK8WM>,
3339 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003340 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3341 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003342 X86Fmsubadd, v8f64, VK8WM>,
3343 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003344 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3345 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003346 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003347 EVEX_CD8<64, CD8VF>;
3348 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3349 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003350 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003351 EVEX_CD8<64, CD8VF>;
3352}
3353
3354let Constraints = "$src1 = $dst" in {
3355multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3356 RegisterClass RC, X86MemOperand x86memop,
3357 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3358 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3359 let mayLoad = 1 in
3360 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3361 (ins RC:$src1, RC:$src3, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003362 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003363 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3364 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3365 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003366 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003367 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3368 [(set RC:$dst, (OpNode RC:$src1,
3369 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3370}
3371} // Constraints = "$src1 = $dst"
3372
3373
3374let ExeDomain = SSEPackedSingle in {
3375 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3376 memopv16f32, f32mem, loadf32, "{1to16}",
3377 X86Fmadd, v16f32>, EVEX_V512,
3378 EVEX_CD8<32, CD8VF>;
3379 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3380 memopv16f32, f32mem, loadf32, "{1to16}",
3381 X86Fmsub, v16f32>, EVEX_V512,
3382 EVEX_CD8<32, CD8VF>;
3383 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3384 memopv16f32, f32mem, loadf32, "{1to16}",
3385 X86Fmaddsub, v16f32>,
3386 EVEX_V512, EVEX_CD8<32, CD8VF>;
3387 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3388 memopv16f32, f32mem, loadf32, "{1to16}",
3389 X86Fmsubadd, v16f32>,
3390 EVEX_V512, EVEX_CD8<32, CD8VF>;
3391 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3392 memopv16f32, f32mem, loadf32, "{1to16}",
3393 X86Fnmadd, v16f32>, EVEX_V512,
3394 EVEX_CD8<32, CD8VF>;
3395 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3396 memopv16f32, f32mem, loadf32, "{1to16}",
3397 X86Fnmsub, v16f32>, EVEX_V512,
3398 EVEX_CD8<32, CD8VF>;
3399}
3400let ExeDomain = SSEPackedDouble in {
3401 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3402 memopv8f64, f64mem, loadf64, "{1to8}",
3403 X86Fmadd, v8f64>, EVEX_V512,
3404 VEX_W, EVEX_CD8<64, CD8VF>;
3405 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3406 memopv8f64, f64mem, loadf64, "{1to8}",
3407 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3408 EVEX_CD8<64, CD8VF>;
3409 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3410 memopv8f64, f64mem, loadf64, "{1to8}",
3411 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3412 EVEX_CD8<64, CD8VF>;
3413 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3414 memopv8f64, f64mem, loadf64, "{1to8}",
3415 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3416 EVEX_CD8<64, CD8VF>;
3417 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3418 memopv8f64, f64mem, loadf64, "{1to8}",
3419 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3420 EVEX_CD8<64, CD8VF>;
3421 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3422 memopv8f64, f64mem, loadf64, "{1to8}",
3423 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3424 EVEX_CD8<64, CD8VF>;
3425}
3426
3427// Scalar FMA
3428let Constraints = "$src1 = $dst" in {
3429multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3430 RegisterClass RC, ValueType OpVT,
3431 X86MemOperand x86memop, Operand memop,
3432 PatFrag mem_frag> {
3433 let isCommutable = 1 in
3434 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3435 (ins RC:$src1, RC:$src2, RC:$src3),
3436 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003437 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003438 [(set RC:$dst,
3439 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3440 let mayLoad = 1 in
3441 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3442 (ins RC:$src1, RC:$src2, f128mem:$src3),
3443 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003444 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003445 [(set RC:$dst,
3446 (OpVT (OpNode RC:$src2, RC:$src1,
3447 (mem_frag addr:$src3))))]>;
3448}
3449
3450} // Constraints = "$src1 = $dst"
3451
Elena Demikhovskycf088092013-12-11 14:31:04 +00003452defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003453 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003454defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003455 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003456defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003457 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003458defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003459 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003460defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003461 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003462defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003463 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003464defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003465 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003466defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003467 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3468
3469//===----------------------------------------------------------------------===//
3470// AVX-512 Scalar convert from sign integer to float/double
3471//===----------------------------------------------------------------------===//
3472
3473multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3474 X86MemOperand x86memop, string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003475let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003476 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003477 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003478 EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003479 let mayLoad = 1 in
3480 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3481 (ins DstRC:$src1, x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003482 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003483 EVEX_4V;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003484} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003485}
Andrew Trick15a47742013-10-09 05:11:10 +00003486let Predicates = [HasAVX512] in {
Elena Demikhovskycf088092013-12-11 14:31:04 +00003487defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003488 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003489defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003490 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003491defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003492 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003493defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003494 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3495
3496def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3497 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3498def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003499 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003500def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3501 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3502def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003503 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003504
3505def : Pat<(f32 (sint_to_fp GR32:$src)),
3506 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3507def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003508 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003509def : Pat<(f64 (sint_to_fp GR32:$src)),
3510 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3511def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003512 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3513
Elena Demikhovskycf088092013-12-11 14:31:04 +00003514defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003515 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003516defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003517 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003518defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003519 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003520defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003521 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3522
3523def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3524 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3525def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3526 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3527def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3528 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3529def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3530 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3531
3532def : Pat<(f32 (uint_to_fp GR32:$src)),
3533 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3534def : Pat<(f32 (uint_to_fp GR64:$src)),
3535 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3536def : Pat<(f64 (uint_to_fp GR32:$src)),
3537 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3538def : Pat<(f64 (uint_to_fp GR64:$src)),
3539 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00003540}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003541
3542//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003543// AVX-512 Scalar convert from float/double to integer
3544//===----------------------------------------------------------------------===//
3545multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3546 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3547 string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003548let hasSideEffects = 0 in {
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003549 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003550 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003551 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3552 Requires<[HasAVX512]>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003553 let mayLoad = 1 in
3554 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003555 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003556 Requires<[HasAVX512]>;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003557} // hasSideEffects = 0
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003558}
3559let Predicates = [HasAVX512] in {
3560// Convert float/double to signed/unsigned int 32/64
3561defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003562 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003563 XS, EVEX_CD8<32, CD8VT1>;
3564defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003565 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003566 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3567defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003568 ssmem, sse_load_f32, "cvtss2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003569 XS, EVEX_CD8<32, CD8VT1>;
3570defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3571 int_x86_avx512_cvtss2usi64, ssmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003572 sse_load_f32, "cvtss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003573 EVEX_CD8<32, CD8VT1>;
3574defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003575 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003576 XD, EVEX_CD8<64, CD8VT1>;
3577defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003578 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003579 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3580defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003581 sdmem, sse_load_f64, "cvtsd2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003582 XD, EVEX_CD8<64, CD8VT1>;
3583defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3584 int_x86_avx512_cvtsd2usi64, sdmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003585 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003586 EVEX_CD8<64, CD8VT1>;
3587
Craig Topper9dd48c82014-01-02 17:28:14 +00003588let isCodeGenOnly = 1 in {
3589 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3590 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3591 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3592 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3593 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3594 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3595 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3596 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3597 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3598 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3599 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3600 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003601
Craig Topper9dd48c82014-01-02 17:28:14 +00003602 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3603 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3604 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3605 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3606 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3607 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3608 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3609 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3610 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3611 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3612 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3613 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3614} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003615
3616// Convert float/double to signed/unsigned int 32/64 with truncation
Craig Topper9dd48c82014-01-02 17:28:14 +00003617let isCodeGenOnly = 1 in {
3618 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3619 ssmem, sse_load_f32, "cvttss2si">,
3620 XS, EVEX_CD8<32, CD8VT1>;
3621 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3622 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3623 "cvttss2si">, XS, VEX_W,
3624 EVEX_CD8<32, CD8VT1>;
3625 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3626 sdmem, sse_load_f64, "cvttsd2si">, XD,
3627 EVEX_CD8<64, CD8VT1>;
3628 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3629 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3630 "cvttsd2si">, XD, VEX_W,
3631 EVEX_CD8<64, CD8VT1>;
3632 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3633 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3634 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3635 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3636 int_x86_avx512_cvttss2usi64, ssmem,
3637 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3638 EVEX_CD8<32, CD8VT1>;
3639 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3640 int_x86_avx512_cvttsd2usi,
3641 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3642 EVEX_CD8<64, CD8VT1>;
3643 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3644 int_x86_avx512_cvttsd2usi64, sdmem,
3645 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3646 EVEX_CD8<64, CD8VT1>;
3647} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003648
3649multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3650 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3651 string asm> {
3652 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003653 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003654 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3655 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003656 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003657 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3658}
3659
3660defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003661 loadf32, "cvttss2si">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003662 EVEX_CD8<32, CD8VT1>;
3663defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003664 loadf32, "cvttss2usi">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003665 EVEX_CD8<32, CD8VT1>;
3666defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003667 loadf32, "cvttss2si">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003668 EVEX_CD8<32, CD8VT1>;
3669defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003670 loadf32, "cvttss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003671 EVEX_CD8<32, CD8VT1>;
3672defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003673 loadf64, "cvttsd2si">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003674 EVEX_CD8<64, CD8VT1>;
3675defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003676 loadf64, "cvttsd2usi">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003677 EVEX_CD8<64, CD8VT1>;
3678defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003679 loadf64, "cvttsd2si">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003680 EVEX_CD8<64, CD8VT1>;
3681defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003682 loadf64, "cvttsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003683 EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003684} // HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003685//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003686// AVX-512 Convert form float to double and back
3687//===----------------------------------------------------------------------===//
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003688let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003689def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3690 (ins FR32X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003691 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003692 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3693let mayLoad = 1 in
3694def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3695 (ins FR32X:$src1, f32mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003696 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003697 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3698 EVEX_CD8<32, CD8VT1>;
3699
3700// Convert scalar double to scalar single
3701def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3702 (ins FR64X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003703 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003704 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3705let mayLoad = 1 in
3706def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3707 (ins FR64X:$src1, f64mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003708 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003709 []>, EVEX_4V, VEX_LIG, VEX_W,
3710 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3711}
3712
3713def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3714 Requires<[HasAVX512]>;
3715def : Pat<(fextend (loadf32 addr:$src)),
3716 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3717
3718def : Pat<(extloadf32 addr:$src),
3719 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3720 Requires<[HasAVX512, OptForSize]>;
3721
3722def : Pat<(extloadf32 addr:$src),
3723 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3724 Requires<[HasAVX512, OptForSpeed]>;
3725
3726def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3727 Requires<[HasAVX512]>;
3728
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003729multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003730 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3731 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3732 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003733let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003734 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003735 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003736 [(set DstRC:$dst,
3737 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003738 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003739 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003740 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003741 let mayLoad = 1 in
3742 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003743 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003744 [(set DstRC:$dst,
3745 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003746} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003747}
3748
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003749multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003750 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3751 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3752 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003753let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003754 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003755 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003756 [(set DstRC:$dst,
3757 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3758 let mayLoad = 1 in
3759 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003760 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003761 [(set DstRC:$dst,
3762 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003763} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003764}
3765
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003766defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003767 memopv8f64, f512mem, v8f32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003768 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003769 EVEX_CD8<64, CD8VF>;
3770
3771defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3772 memopv4f64, f256mem, v8f64, v8f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003773 SSEPackedDouble>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003774 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003775def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3776 (VCVTPS2PDZrm addr:$src)>;
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00003777
3778def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3779 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3780 (VCVTPD2PSZrr VR512:$src)>;
3781
3782def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3783 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3784 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003785
3786//===----------------------------------------------------------------------===//
3787// AVX-512 Vector convert from sign integer to float/double
3788//===----------------------------------------------------------------------===//
3789
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003790defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003791 memopv8i64, i512mem, v16f32, v16i32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003792 SSEPackedSingle>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003793 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003794
3795defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3796 memopv4i64, i256mem, v8f64, v8i32,
3797 SSEPackedDouble>, EVEX_V512, XS,
3798 EVEX_CD8<32, CD8VH>;
3799
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003800defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003801 memopv16f32, f512mem, v16i32, v16f32,
3802 SSEPackedSingle>, EVEX_V512, XS,
3803 EVEX_CD8<32, CD8VF>;
3804
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003805defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003806 memopv8f64, f512mem, v8i32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003807 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003808 EVEX_CD8<64, CD8VF>;
3809
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003810defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003811 memopv16f32, f512mem, v16i32, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003812 SSEPackedSingle>, EVEX_V512, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003813 EVEX_CD8<32, CD8VF>;
3814
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003815// cvttps2udq (src, 0, mask-all-ones, sae-current)
3816def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3817 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3818 (VCVTTPS2UDQZrr VR512:$src)>;
3819
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003820defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003821 memopv8f64, f512mem, v8i32, v8f64,
Craig Topper5ccb6172014-02-18 00:21:49 +00003822 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003823 EVEX_CD8<64, CD8VF>;
3824
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003825// cvttpd2udq (src, 0, mask-all-ones, sae-current)
3826def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3827 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3828 (VCVTTPD2UDQZrr VR512:$src)>;
3829
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003830defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3831 memopv4i64, f256mem, v8f64, v8i32,
3832 SSEPackedDouble>, EVEX_V512, XS,
3833 EVEX_CD8<32, CD8VH>;
3834
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003835defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003836 memopv16i32, f512mem, v16f32, v16i32,
3837 SSEPackedSingle>, EVEX_V512, XD,
3838 EVEX_CD8<32, CD8VF>;
3839
3840def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3841 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3842 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3843
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00003844def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3845 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3846 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3847
3848def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3849 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3850 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3851
3852def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3853 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3854 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003855
Cameron McInallyf10a7c92014-06-18 14:04:37 +00003856def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3857 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3858 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3859
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003860def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003861 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003862 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003863def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3864 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3865 (VCVTDQ2PDZrr VR256X:$src)>;
3866def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3867 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3868 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3869def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3870 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3871 (VCVTUDQ2PDZrr VR256X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003872
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003873multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3874 RegisterClass DstRC, PatFrag mem_frag,
3875 X86MemOperand x86memop, Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003876let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003877 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003878 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003879 [], d>, EVEX;
3880 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003881 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003882 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003883 let mayLoad = 1 in
3884 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003885 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003886 [], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003887} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003888}
3889
3890defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
Craig Topperae11aed2014-01-14 07:41:20 +00003891 memopv16f32, f512mem, SSEPackedSingle>, PD,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003892 EVEX_V512, EVEX_CD8<32, CD8VF>;
3893defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3894 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3895 EVEX_V512, EVEX_CD8<64, CD8VF>;
3896
3897def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3898 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3899 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3900
3901def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3902 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3903 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3904
3905defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3906 memopv16f32, f512mem, SSEPackedSingle>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003907 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003908defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3909 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
Craig Topper5ccb6172014-02-18 00:21:49 +00003910 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003911
3912def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3913 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3914 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3915
3916def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3917 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3918 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003919
3920let Predicates = [HasAVX512] in {
3921 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3922 (VCVTPD2PSZrm addr:$src)>;
3923 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3924 (VCVTPS2PDZrm addr:$src)>;
3925}
3926
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003927//===----------------------------------------------------------------------===//
3928// Half precision conversion instructions
3929//===----------------------------------------------------------------------===//
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003930multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3931 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003932 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3933 "vcvtph2ps\t{$src, $dst|$dst, $src}",
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003934 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003935 let hasSideEffects = 0, mayLoad = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003936 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3937 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3938}
3939
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003940multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3941 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003942 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3943 (ins srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003944 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3945 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003946 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003947 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3948 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003949 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003950}
3951
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003952defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003953 EVEX_CD8<32, CD8VH>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003954defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003955 EVEX_CD8<32, CD8VH>;
3956
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003957def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3958 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3959 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3960
3961def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3962 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3963 (VCVTPH2PSZrr VR256X:$src)>;
3964
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003965let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3966 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003967 "ucomiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003968 EVEX_CD8<32, CD8VT1>;
3969 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Craig Topperae11aed2014-01-14 07:41:20 +00003970 "ucomisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003971 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3972 let Pattern = []<dag> in {
3973 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
Craig Topper5ccb6172014-02-18 00:21:49 +00003974 "comiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003975 EVEX_CD8<32, CD8VT1>;
3976 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
Craig Topperae11aed2014-01-14 07:41:20 +00003977 "comisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003978 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3979 }
Craig Topper9dd48c82014-01-02 17:28:14 +00003980 let isCodeGenOnly = 1 in {
3981 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003982 load, "ucomiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003983 EVEX_CD8<32, CD8VT1>;
3984 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003985 load, "ucomisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003986 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003987
Craig Topper9dd48c82014-01-02 17:28:14 +00003988 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003989 load, "comiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003990 EVEX_CD8<32, CD8VT1>;
3991 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003992 load, "comisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003993 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3994 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003995}
3996
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003997/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3998multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3999 X86MemOperand x86memop> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004000 let hasSideEffects = 0 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004001 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4002 (ins RC:$src1, RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004003 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004004 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004005 let mayLoad = 1 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004006 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4007 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004008 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004009 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004010 }
4011}
4012}
4013
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004014defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
4015 EVEX_CD8<32, CD8VT1>;
4016defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
4017 VEX_W, EVEX_CD8<64, CD8VT1>;
4018defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
4019 EVEX_CD8<32, CD8VT1>;
4020defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
4021 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004022
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004023def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
4024 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4025 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4026 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004027
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004028def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
4029 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4030 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4031 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004032
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004033def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
4034 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4035 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4036 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004037
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004038def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
4039 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4040 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4041 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004042
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004043/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
4044multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
4045 RegisterClass RC, X86MemOperand x86memop,
4046 PatFrag mem_frag, ValueType OpVt> {
4047 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4048 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004049 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004050 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
4051 EVEX;
4052 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004053 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004054 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
4055 EVEX;
4056}
4057defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
4058 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4059defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
4060 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4061defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
4062 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4063defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
4064 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4065
4066def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
4067 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4068 (VRSQRT14PSZr VR512:$src)>;
4069def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
4070 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4071 (VRSQRT14PDZr VR512:$src)>;
4072
4073def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4074 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4075 (VRCP14PSZr VR512:$src)>;
4076def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4077 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4078 (VRCP14PDZr VR512:$src)>;
4079
4080/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4081multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4082 X86MemOperand x86memop> {
4083 let hasSideEffects = 0, Predicates = [HasERI] in {
4084 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4085 (ins RC:$src1, RC:$src2),
4086 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004087 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004088 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4089 (ins RC:$src1, RC:$src2),
4090 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004091 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004092 []>, EVEX_4V, EVEX_B;
4093 let mayLoad = 1 in {
4094 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4095 (ins RC:$src1, x86memop:$src2),
4096 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004097 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004098 }
4099}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004100}
4101
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004102defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4103 EVEX_CD8<32, CD8VT1>;
4104defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4105 VEX_W, EVEX_CD8<64, CD8VT1>;
4106defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4107 EVEX_CD8<32, CD8VT1>;
4108defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4109 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004110
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004111def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4112 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4113 FROUND_NO_EXC)),
4114 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4115 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4116
4117def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4118 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4119 FROUND_NO_EXC)),
4120 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4121 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4122
4123def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4124 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4125 FROUND_NO_EXC)),
4126 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4127 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4128
4129def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4130 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4131 FROUND_NO_EXC)),
4132 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4133 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4134
4135/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4136multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4137 RegisterClass RC, X86MemOperand x86memop> {
4138 let hasSideEffects = 0, Predicates = [HasERI] in {
4139 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4140 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004141 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004142 []>, EVEX;
4143 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4144 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004145 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004146 []>, EVEX, EVEX_B;
4147 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004148 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004149 []>, EVEX;
4150 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004151}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004152defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4153 EVEX_V512, EVEX_CD8<32, CD8VF>;
4154defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4155 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4156defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4157 EVEX_V512, EVEX_CD8<32, CD8VF>;
4158defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4159 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4160
4161def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4162 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4163 (VRSQRT28PSZrb VR512:$src)>;
4164def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4165 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4166 (VRSQRT28PDZrb VR512:$src)>;
4167
4168def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4169 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4170 (VRCP28PSZrb VR512:$src)>;
4171def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4172 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4173 (VRCP28PDZrb VR512:$src)>;
4174
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004175multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004176 OpndItins itins_s, OpndItins itins_d> {
4177 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004178 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004179 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4180 EVEX, EVEX_V512;
4181
4182 let mayLoad = 1 in
4183 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004184 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004185 [(set VR512:$dst,
4186 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4187 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4188
4189 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004190 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004191 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4192 EVEX, EVEX_V512;
4193
4194 let mayLoad = 1 in
4195 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004196 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004197 [(set VR512:$dst, (OpNode
4198 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4199 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4200
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004201}
4202
4203multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4204 Intrinsic F32Int, Intrinsic F64Int,
4205 OpndItins itins_s, OpndItins itins_d> {
4206 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4207 (ins FR32X:$src1, FR32X:$src2),
4208 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004209 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004210 [], itins_s.rr>, XS, EVEX_4V;
Craig Topper9dd48c82014-01-02 17:28:14 +00004211 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004212 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4213 (ins VR128X:$src1, VR128X:$src2),
4214 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004215 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004216 [(set VR128X:$dst,
4217 (F32Int VR128X:$src1, VR128X:$src2))],
4218 itins_s.rr>, XS, EVEX_4V;
4219 let mayLoad = 1 in {
4220 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4221 (ins FR32X:$src1, f32mem:$src2),
4222 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004223 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004224 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004225 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004226 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4227 (ins VR128X:$src1, ssmem:$src2),
4228 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004229 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004230 [(set VR128X:$dst,
4231 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4232 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4233 }
4234 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4235 (ins FR64X:$src1, FR64X:$src2),
4236 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004237 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004238 XD, EVEX_4V, VEX_W;
Craig Topper9dd48c82014-01-02 17:28:14 +00004239 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004240 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4241 (ins VR128X:$src1, VR128X:$src2),
4242 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004243 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004244 [(set VR128X:$dst,
4245 (F64Int VR128X:$src1, VR128X:$src2))],
4246 itins_s.rr>, XD, EVEX_4V, VEX_W;
4247 let mayLoad = 1 in {
4248 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4249 (ins FR64X:$src1, f64mem:$src2),
4250 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004251 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004252 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004253 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004254 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4255 (ins VR128X:$src1, sdmem:$src2),
4256 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004257 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004258 [(set VR128X:$dst,
4259 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4260 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4261 }
4262}
4263
4264
4265defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4266 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4267 SSE_SQRTSS, SSE_SQRTSD>,
4268 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004269 SSE_SQRTPS, SSE_SQRTPD>;
4270
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004271let Predicates = [HasAVX512] in {
Elena Demikhovskyf1648592014-07-22 11:07:31 +00004272 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4273 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4274 (VSQRTPSZrr VR512:$src1)>;
4275 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4276 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4277 (VSQRTPDZrr VR512:$src1)>;
4278
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004279 def : Pat<(f32 (fsqrt FR32X:$src)),
4280 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4281 def : Pat<(f32 (fsqrt (load addr:$src))),
4282 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4283 Requires<[OptForSize]>;
4284 def : Pat<(f64 (fsqrt FR64X:$src)),
4285 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4286 def : Pat<(f64 (fsqrt (load addr:$src))),
4287 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4288 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004289
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004290 def : Pat<(f32 (X86frsqrt FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004291 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004292 def : Pat<(f32 (X86frsqrt (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004293 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004294 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004295
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004296 def : Pat<(f32 (X86frcp FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004297 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004298 def : Pat<(f32 (X86frcp (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004299 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004300 Requires<[OptForSize]>;
4301
4302 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4303 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4304 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4305 VR128X)>;
4306 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4307 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4308
4309 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4310 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4311 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4312 VR128X)>;
4313 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4314 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4315}
4316
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004317
4318multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4319 X86MemOperand x86memop, RegisterClass RC,
4320 PatFrag mem_frag32, PatFrag mem_frag64,
4321 Intrinsic V4F32Int, Intrinsic V2F64Int,
4322 CD8VForm VForm> {
4323let ExeDomain = SSEPackedSingle in {
4324 // Intrinsic operation, reg.
4325 // Vector intrinsic operation, reg
4326 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4327 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4328 !strconcat(OpcodeStr,
4329 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4330 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4331
4332 // Vector intrinsic operation, mem
4333 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4334 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4335 !strconcat(OpcodeStr,
4336 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4337 [(set RC:$dst,
4338 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4339 EVEX_CD8<32, VForm>;
4340} // ExeDomain = SSEPackedSingle
4341
4342let ExeDomain = SSEPackedDouble in {
4343 // Vector intrinsic operation, reg
4344 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4345 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4346 !strconcat(OpcodeStr,
4347 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4348 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4349
4350 // Vector intrinsic operation, mem
4351 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4352 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4353 !strconcat(OpcodeStr,
4354 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4355 [(set RC:$dst,
4356 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4357 EVEX_CD8<64, VForm>;
4358} // ExeDomain = SSEPackedDouble
4359}
4360
4361multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4362 string OpcodeStr,
4363 Intrinsic F32Int,
4364 Intrinsic F64Int> {
4365let ExeDomain = GenericDomain in {
4366 // Operation, reg.
4367 let hasSideEffects = 0 in
4368 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4369 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4370 !strconcat(OpcodeStr,
4371 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4372 []>;
4373
4374 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004375 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004376 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4377 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4378 !strconcat(OpcodeStr,
4379 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4380 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4381
4382 // Intrinsic operation, mem.
4383 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4384 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4385 !strconcat(OpcodeStr,
4386 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4387 [(set VR128X:$dst, (F32Int VR128X:$src1,
4388 sse_load_f32:$src2, imm:$src3))]>,
4389 EVEX_CD8<32, CD8VT1>;
4390
4391 // Operation, reg.
4392 let hasSideEffects = 0 in
4393 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4394 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4395 !strconcat(OpcodeStr,
4396 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4397 []>, VEX_W;
4398
4399 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004400 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004401 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4402 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4403 !strconcat(OpcodeStr,
4404 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4405 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4406 VEX_W;
4407
4408 // Intrinsic operation, mem.
4409 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4410 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4411 !strconcat(OpcodeStr,
4412 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4413 [(set VR128X:$dst,
4414 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4415 VEX_W, EVEX_CD8<64, CD8VT1>;
4416} // ExeDomain = GenericDomain
4417}
4418
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004419multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4420 X86MemOperand x86memop, RegisterClass RC,
4421 PatFrag mem_frag, Domain d> {
4422let ExeDomain = d in {
4423 // Intrinsic operation, reg.
4424 // Vector intrinsic operation, reg
4425 def r : AVX512AIi8<opc, MRMSrcReg,
4426 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4427 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004428 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004429 []>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004430
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004431 // Vector intrinsic operation, mem
4432 def m : AVX512AIi8<opc, MRMSrcMem,
4433 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4434 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004435 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004436 []>, EVEX;
4437} // ExeDomain
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004438}
4439
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004440
4441defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4442 memopv16f32, SSEPackedSingle>, EVEX_V512,
4443 EVEX_CD8<32, CD8VF>;
4444
4445def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004446 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004447 FROUND_CURRENT)),
4448 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4449
4450
4451defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4452 memopv8f64, SSEPackedDouble>, EVEX_V512,
4453 VEX_W, EVEX_CD8<64, CD8VF>;
4454
4455def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004456 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004457 FROUND_CURRENT)),
4458 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4459
4460multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4461 Operand x86memop, RegisterClass RC, Domain d> {
4462let ExeDomain = d in {
4463 def r : AVX512AIi8<opc, MRMSrcReg,
4464 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4465 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004466 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004467 []>, EVEX_4V;
4468
4469 def m : AVX512AIi8<opc, MRMSrcMem,
4470 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4471 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004472 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004473 []>, EVEX_4V;
4474} // ExeDomain
4475}
4476
4477defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4478 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4479
4480defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4481 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4482
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004483def : Pat<(ffloor FR32X:$src),
4484 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4485def : Pat<(f64 (ffloor FR64X:$src)),
4486 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4487def : Pat<(f32 (fnearbyint FR32X:$src)),
4488 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4489def : Pat<(f64 (fnearbyint FR64X:$src)),
4490 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4491def : Pat<(f32 (fceil FR32X:$src)),
4492 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4493def : Pat<(f64 (fceil FR64X:$src)),
4494 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4495def : Pat<(f32 (frint FR32X:$src)),
4496 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4497def : Pat<(f64 (frint FR64X:$src)),
4498 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4499def : Pat<(f32 (ftrunc FR32X:$src)),
4500 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4501def : Pat<(f64 (ftrunc FR64X:$src)),
4502 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4503
4504def : Pat<(v16f32 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004505 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004506def : Pat<(v16f32 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004507 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004508def : Pat<(v16f32 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004509 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004510def : Pat<(v16f32 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004511 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004512def : Pat<(v16f32 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004513 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004514
4515def : Pat<(v8f64 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004516 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004517def : Pat<(v8f64 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004518 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004519def : Pat<(v8f64 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004520 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004521def : Pat<(v8f64 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004522 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004523def : Pat<(v8f64 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004524 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004525
4526//-------------------------------------------------
4527// Integer truncate and extend operations
4528//-------------------------------------------------
4529
4530multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4531 RegisterClass dstRC, RegisterClass srcRC,
4532 RegisterClass KRC, X86MemOperand x86memop> {
4533 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4534 (ins srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004535 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004536 []>, EVEX;
4537
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004538 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4539 (ins KRC:$mask, srcRC:$src),
4540 !strconcat(OpcodeStr,
4541 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4542 []>, EVEX, EVEX_K;
4543
4544 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004545 (ins KRC:$mask, srcRC:$src),
4546 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004547 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004548 []>, EVEX, EVEX_KZ;
4549
4550 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004551 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004552 []>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004553
4554 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4555 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4556 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4557 []>, EVEX, EVEX_K;
4558
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004559}
4560defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4561 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4562defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4563 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4564defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4565 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4566defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4567 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4568defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4569 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4570defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4571 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4572defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4573 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4574defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4575 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4576defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4577 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4578defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4579 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4580defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4581 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4582defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4583 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4584defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4585 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4586defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4587 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4588defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4589 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4590
4591def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4592def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4593def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4594def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4595def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4596
4597def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004598 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004599def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004600 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004601def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004602 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004603def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004604 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004605
4606
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004607multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4608 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4609 PatFrag mem_frag, X86MemOperand x86memop,
4610 ValueType OpVT, ValueType InVT> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004611
4612 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4613 (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004614 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004615 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004616
4617 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4618 (ins KRC:$mask, SrcRC:$src),
4619 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4620 []>, EVEX, EVEX_K;
4621
4622 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4623 (ins KRC:$mask, SrcRC:$src),
4624 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4625 []>, EVEX, EVEX_KZ;
4626
4627 let mayLoad = 1 in {
4628 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004629 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004630 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004631 [(set DstRC:$dst,
4632 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4633 EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004634
4635 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4636 (ins KRC:$mask, x86memop:$src),
4637 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4638 []>,
4639 EVEX, EVEX_K;
4640
4641 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4642 (ins KRC:$mask, x86memop:$src),
4643 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4644 []>,
4645 EVEX, EVEX_KZ;
4646 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004647}
4648
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004649defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004650 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4651 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004652defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004653 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4654 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004655defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004656 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4657 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004658defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004659 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4660 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004661defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004662 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4663 EVEX_CD8<32, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004664
4665defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004666 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4667 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004668defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004669 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4670 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004671defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004672 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4673 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004674defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004675 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4676 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004677defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004678 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4679 EVEX_CD8<32, CD8VH>;
4680
4681//===----------------------------------------------------------------------===//
4682// GATHER - SCATTER Operations
4683
4684multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4685 RegisterClass RC, X86MemOperand memop> {
4686let mayLoad = 1,
4687 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4688 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4689 (ins RC:$src1, KRC:$mask, memop:$src2),
4690 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004691 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004692 []>, EVEX, EVEX_K;
4693}
Cameron McInally45325962014-03-26 13:50:50 +00004694
4695let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004696defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4697 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004698defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4699 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004700}
4701
4702let ExeDomain = SSEPackedSingle in {
4703defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4704 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004705defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4706 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004707}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004708
4709defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4710 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4711defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4712 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4713
4714defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4715 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4716defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4717 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4718
4719multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4720 RegisterClass RC, X86MemOperand memop> {
4721let mayStore = 1, Constraints = "$mask = $mask_wb" in
4722 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4723 (ins memop:$dst, KRC:$mask, RC:$src2),
4724 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004725 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004726 []>, EVEX, EVEX_K;
4727}
4728
Cameron McInally45325962014-03-26 13:50:50 +00004729let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004730defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4731 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004732defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4733 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004734}
4735
4736let ExeDomain = SSEPackedSingle in {
4737defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4738 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004739defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4740 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004741}
4742
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004743defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4744 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4745defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4746 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4747
4748defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4749 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4750defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4751 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4752
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00004753// prefetch
4754multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4755 RegisterClass KRC, X86MemOperand memop> {
4756 let Predicates = [HasPFI], hasSideEffects = 1 in
4757 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4758 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4759 []>, EVEX, EVEX_K;
4760}
4761
4762defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4763 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4764
4765defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4766 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4767
4768defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4769 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4770
4771defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4772 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4773
4774defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4775 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4776
4777defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4778 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4779
4780defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4781 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4782
4783defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4784 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4785
4786defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4787 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4788
4789defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4790 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4791
4792defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4793 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4794
4795defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4796 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4797
4798defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4799 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4800
4801defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4802 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4803
4804defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4805 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4806
4807defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4808 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004809//===----------------------------------------------------------------------===//
4810// VSHUFPS - VSHUFPD Operations
4811
4812multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4813 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4814 Domain d> {
4815 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4816 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4817 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004818 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004819 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4820 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004821 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004822 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4823 (ins RC:$src1, RC:$src2, i8imm:$src3),
4824 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004825 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004826 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4827 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004828 EVEX_4V, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004829}
4830
4831defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004832 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004833defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00004834 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004835
Elena Demikhovsky462a2d22013-10-06 06:11:18 +00004836def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4837 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4838def : Pat<(v16i32 (X86Shufp VR512:$src1,
4839 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4840 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4841
4842def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4843 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4844def : Pat<(v8i64 (X86Shufp VR512:$src1,
4845 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4846 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004847
Adam Nemet5ed17da2014-08-21 19:50:07 +00004848multiclass avx512_valign<X86VectorVTInfo _> {
4849 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4850 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4851 "valign"##_.Suffix,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004852 "$src3, $src2, $src1", "$src1, $src2, $src3",
Adam Nemet5ed17da2014-08-21 19:50:07 +00004853 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4854 (i8 imm:$src3))),
4855 _.VT, _.RC, _.KRCWM>,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004856 AVX512AIi8Base, EVEX_4V;
Adam Nemetfd2161b2014-08-05 17:23:04 +00004857
Adam Nemetf92139d2014-08-05 17:22:50 +00004858 // Also match valign of packed floats.
Adam Nemet5ed17da2014-08-21 19:50:07 +00004859 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4860 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
Adam Nemetf92139d2014-08-05 17:22:50 +00004861
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00004862 let mayLoad = 1 in
Adam Nemet5ed17da2014-08-21 19:50:07 +00004863 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4864 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4865 !strconcat("valign"##_.Suffix,
Adam Nemet1c752d82014-08-05 17:22:47 +00004866 " \t{$src3, $src2, $src1, $dst|"
4867 "$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004868 []>, EVEX_4V;
4869}
Adam Nemet5ed17da2014-08-21 19:50:07 +00004870defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4871defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004872
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004873// Helper fragments to match sext vXi1 to vXiY.
4874def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4875def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4876
4877multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4878 RegisterClass KRC, RegisterClass RC,
4879 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4880 string BrdcstStr> {
4881 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4882 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4883 []>, EVEX;
4884 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4885 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4886 []>, EVEX, EVEX_K;
4887 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4888 !strconcat(OpcodeStr,
4889 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4890 []>, EVEX, EVEX_KZ;
4891 let mayLoad = 1 in {
4892 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4893 (ins x86memop:$src),
4894 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4895 []>, EVEX;
4896 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4897 (ins KRC:$mask, x86memop:$src),
4898 !strconcat(OpcodeStr,
4899 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4900 []>, EVEX, EVEX_K;
4901 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4902 (ins KRC:$mask, x86memop:$src),
4903 !strconcat(OpcodeStr,
4904 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4905 []>, EVEX, EVEX_KZ;
4906 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4907 (ins x86scalar_mop:$src),
4908 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4909 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4910 []>, EVEX, EVEX_B;
4911 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4912 (ins KRC:$mask, x86scalar_mop:$src),
4913 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4914 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4915 []>, EVEX, EVEX_B, EVEX_K;
4916 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4917 (ins KRC:$mask, x86scalar_mop:$src),
4918 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4919 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4920 BrdcstStr, "}"),
4921 []>, EVEX, EVEX_B, EVEX_KZ;
4922 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004923}
4924
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004925defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4926 i512mem, i32mem, "{1to16}">, EVEX_V512,
4927 EVEX_CD8<32, CD8VF>;
4928defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4929 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4930 EVEX_CD8<64, CD8VF>;
4931
4932def : Pat<(xor
4933 (bc_v16i32 (v16i1sextv16i32)),
4934 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4935 (VPABSDZrr VR512:$src)>;
4936def : Pat<(xor
4937 (bc_v8i64 (v8i1sextv8i64)),
4938 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4939 (VPABSQZrr VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004940
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004941def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4942 (v16i32 immAllZerosV), (i16 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004943 (VPABSDZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004944def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4945 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004946 (VPABSQZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004947
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004948multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004949 RegisterClass RC, RegisterClass KRC,
4950 X86MemOperand x86memop,
4951 X86MemOperand x86scalar_mop, string BrdcstStr> {
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004952 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4953 (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004954 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004955 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004956 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4957 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004958 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004959 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004960 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4961 (ins x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004962 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004963 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4964 []>, EVEX, EVEX_B;
4965 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4966 (ins KRC:$mask, RC:$src),
4967 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004968 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004969 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004970 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4971 (ins KRC:$mask, x86memop:$src),
4972 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004973 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004974 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004975 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4976 (ins KRC:$mask, x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004977 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004978 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4979 BrdcstStr, "}"),
4980 []>, EVEX, EVEX_KZ, EVEX_B;
4981
4982 let Constraints = "$src1 = $dst" in {
4983 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4984 (ins RC:$src1, KRC:$mask, RC:$src2),
4985 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004986 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004987 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004988 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4989 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4990 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004991 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004992 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004993 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4994 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004995 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004996 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4997 []>, EVEX, EVEX_K, EVEX_B;
4998 }
4999}
5000
5001let Predicates = [HasCDI] in {
5002defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005003 i512mem, i32mem, "{1to16}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005004 EVEX_V512, EVEX_CD8<32, CD8VF>;
5005
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005006
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005007defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005008 i512mem, i64mem, "{1to8}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005009 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005010
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005011}
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005012
5013def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
5014 GR16:$mask),
5015 (VPCONFLICTDrrk VR512:$src1,
5016 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5017
5018def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
5019 GR8:$mask),
5020 (VPCONFLICTQrrk VR512:$src1,
5021 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005022
Cameron McInally5d1b7b92014-06-11 12:54:45 +00005023let Predicates = [HasCDI] in {
5024defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
5025 i512mem, i32mem, "{1to16}">,
5026 EVEX_V512, EVEX_CD8<32, CD8VF>;
5027
5028
5029defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
5030 i512mem, i64mem, "{1to8}">,
5031 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5032
5033}
5034
5035def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
5036 GR16:$mask),
5037 (VPLZCNTDrrk VR512:$src1,
5038 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5039
5040def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
5041 GR8:$mask),
5042 (VPLZCNTQrrk VR512:$src1,
5043 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5044
Cameron McInally0d0489c2014-06-16 14:12:28 +00005045def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
5046 (VPLZCNTDrm addr:$src)>;
5047def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
5048 (VPLZCNTDrr VR512:$src)>;
5049def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
5050 (VPLZCNTQrm addr:$src)>;
5051def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
5052 (VPLZCNTQrr VR512:$src)>;
5053
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005054def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5055def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5056def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Elena Demikhovskyacc5c9e2014-04-22 14:13:10 +00005057
5058def : Pat<(store VK1:$src, addr:$dst),
5059 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
5060
5061def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
5062 (truncstore node:$val, node:$ptr), [{
5063 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
5064}]>;
5065
5066def : Pat<(truncstorei1 GR8:$src, addr:$dst),
5067 (MOV8mr addr:$dst, GR8:$src)>;
5068