blob: d11a1d34998f131f5d9bb62c835bc3e6fc0a379b [file] [log] [blame]
Adam Nemet5ed17da2014-08-21 19:50:07 +00001// Group template arguments that can be derived from the vector type (EltNum x
2// EltVT). These are things like the register class for the writemask, etc.
3// The idea is to pass one of these as the template argument rather than the
4// individual arguments.
5class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
6 string suffix = ""> {
7 RegisterClass RC = rc;
8
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
11
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
14
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
17 // !lt in tablegen.
18 RegisterClass MRC =
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
21
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
24
Robert Khasanov2ea081d2014-08-25 14:49:34 +000025 string VTName = "v" # NumElts # EltVT;
26
Adam Nemet5ed17da2014-08-21 19:50:07 +000027 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000028 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000029
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000032 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000034
35 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000036 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000037
38 // Size of RC in bits, e.g. 512 for VR512.
39 int Size = VT.Size;
40
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000043 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
44
45 // Load patterns
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
52 VTName)), VTName));
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
60 VTName,
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
63 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +000064
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
67}
68
Robert Khasanov2ea081d2014-08-25 14:49:34 +000069def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
70def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +000071def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
72def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
73
Robert Khasanov2ea081d2014-08-25 14:49:34 +000074// "x" in v32i8x_info means RC = VR256X
75def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
76def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
77def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
78def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
79
80def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
81def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
82def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
83def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
84
85class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
86 X86VectorVTInfo i128> {
87 X86VectorVTInfo info512 = i512;
88 X86VectorVTInfo info256 = i256;
89 X86VectorVTInfo info128 = i128;
90}
91
92def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
93 v16i8x_info>;
94def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
95 v8i16x_info>;
96def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
97 v4i32x_info>;
98def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
99 v2i64x_info>;
100
101
Adam Nemet2e91ee52014-08-14 17:13:19 +0000102// Common base class of AVX512_masking and AVX512_masking_3src.
103multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
104 dag MaskingIns, dag ZeroMaskingIns,
105 string OpcodeStr,
106 string AttSrcAsm, string IntelSrcAsm,
107 dag RHS, dag MaskingRHS, ValueType OpVT,
108 RegisterClass RC, RegisterClass KRC,
109 string MaskingConstraint = ""> {
Adam Nemet2e2537f2014-08-07 17:53:55 +0000110 def NAME: AVX512<O, F, Outs, Ins,
111 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
112 "$dst, "#IntelSrcAsm#"}",
113 [(set RC:$dst, RHS)]>;
114
Adam Nemetfa1f7202014-08-07 23:18:18 +0000115 // Prefer over VMOV*rrk Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000116 let AddedComplexity = 20 in
117 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Adam Nemet2e2537f2014-08-07 17:53:55 +0000118 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
119 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet2e91ee52014-08-14 17:13:19 +0000120 [(set RC:$dst, MaskingRHS)]>,
121 EVEX_K {
122 // In case of the 3src subclass this is overridden with a let.
123 string Constraints = MaskingConstraint;
124 }
Adam Nemet7d498622014-08-07 23:53:38 +0000125 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000126 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Adam Nemet7d498622014-08-07 23:53:38 +0000127 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
128 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
129 [(set RC:$dst,
130 (vselect KRC:$mask, RHS,
131 (OpVT (bitconvert
132 (v16i32 immAllZerosV)))))]>,
133 EVEX_KZ;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000134}
135
Adam Nemet2e91ee52014-08-14 17:13:19 +0000136// This multiclass generates the unconditional/non-masking, the masking and
137// the zero-masking variant of the instruction. In the masking case, the
138// perserved vector elements come from a new dummy input operand tied to $dst.
139multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
140 string OpcodeStr,
141 string AttSrcAsm, string IntelSrcAsm,
142 dag RHS, ValueType OpVT, RegisterClass RC,
143 RegisterClass KRC> :
144 AVX512_masking_common<O, F, Outs,
145 Ins,
146 !con((ins RC:$src0, KRC:$mask), Ins),
147 !con((ins KRC:$mask), Ins),
148 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
149 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
150 "$src0 = $dst">;
151
152// Similar to AVX512_masking but in this case one of the source operands
153// ($src1) is already tied to $dst so we just use that for the preserved
154// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
155// $src1.
156multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
157 string OpcodeStr,
158 string AttSrcAsm, string IntelSrcAsm,
159 dag RHS, ValueType OpVT,
160 RegisterClass RC, RegisterClass KRC> :
161 AVX512_masking_common<O, F, Outs,
162 !con((ins RC:$src1), NonTiedIns),
163 !con((ins RC:$src1), !con((ins KRC:$mask),
164 NonTiedIns)),
165 !con((ins RC:$src1), !con((ins KRC:$mask),
166 NonTiedIns)),
167 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
168 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
169
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000170// Bitcasts between 512-bit vector types. Return the original type since
171// no instruction is needed for the conversion
172let Predicates = [HasAVX512] in {
Robert Khasanovbfa01312014-07-21 14:54:21 +0000173 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000174 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000175 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
176 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
177 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000178 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000179 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
180 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
181 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000182 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000183 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000184 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
185 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000186 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000187 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
188 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovsky40a77142014-08-11 09:59:08 +0000189 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000190 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
191 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000192 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000193 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
194 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
195 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
196 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
197 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
200 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
201 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
202 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
203 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000204
205 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
206 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
207 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
208 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
209 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
210 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
211 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
212 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
213 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
214 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
215 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
216 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
217 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
218 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
219 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
220 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
221 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
222 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
223 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
224 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
225 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
226 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
227 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
228 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
229 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
230 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
231 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
232 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
233 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
234 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
235
236// Bitcasts between 256-bit vector types. Return the original type since
237// no instruction is needed for the conversion
238 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
239 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
240 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
241 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
242 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
243 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
244 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
245 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
246 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
247 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
248 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
249 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
250 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
251 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
252 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
253 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
254 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
255 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
256 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
257 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
258 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
259 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
260 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
261 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
262 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
263 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
264 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
265 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
266 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
267 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
268}
269
270//
271// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
272//
273
274let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
275 isPseudo = 1, Predicates = [HasAVX512] in {
276def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
277 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
278}
279
Craig Topperfb1746b2014-01-30 06:03:19 +0000280let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000281def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
282def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
283def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000284}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000285
286//===----------------------------------------------------------------------===//
287// AVX-512 - VECTOR INSERT
288//
289// -- 32x8 form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000290let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000291def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
292 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
293 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
294 []>, EVEX_4V, EVEX_V512;
295let mayLoad = 1 in
296def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
297 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
300}
301
302// -- 64x4 fp form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000303let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000304def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
305 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
306 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
307 []>, EVEX_4V, EVEX_V512, VEX_W;
308let mayLoad = 1 in
309def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
310 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
313}
314// -- 32x4 integer form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000315let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000316def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
317 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
318 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512;
320let mayLoad = 1 in
321def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
322 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000325}
326
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000327let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000328// -- 64x4 form --
329def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
330 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
331 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
332 []>, EVEX_4V, EVEX_V512, VEX_W;
333let mayLoad = 1 in
334def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
335 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
338}
339
340def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
341 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
342 (INSERT_get_vinsert128_imm VR512:$ins))>;
343def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
344 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
345 (INSERT_get_vinsert128_imm VR512:$ins))>;
346def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
347 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
348 (INSERT_get_vinsert128_imm VR512:$ins))>;
349def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
350 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
351 (INSERT_get_vinsert128_imm VR512:$ins))>;
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000352
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000353def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
354 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000357 (bc_v4i32 (loadv2i64 addr:$src2)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000358 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
359 (INSERT_get_vinsert128_imm VR512:$ins))>;
360def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
364 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
365 (INSERT_get_vinsert128_imm VR512:$ins))>;
366
367def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
368 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
369 (INSERT_get_vinsert256_imm VR512:$ins))>;
370def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
371 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
372 (INSERT_get_vinsert256_imm VR512:$ins))>;
373def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
374 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
375 (INSERT_get_vinsert256_imm VR512:$ins))>;
376def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
377 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
378 (INSERT_get_vinsert256_imm VR512:$ins))>;
379
380def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
381 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
384 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
387 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
388 (INSERT_get_vinsert256_imm VR512:$ins))>;
389def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
390 (bc_v8i32 (loadv4i64 addr:$src2)),
391 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
393
394// vinsertps - insert f32 to XMM
395def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000396 (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000397 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000398 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000399 EVEX_4V;
400def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000401 (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000403 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000404 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
405 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
406
407//===----------------------------------------------------------------------===//
408// AVX-512 VECTOR EXTRACT
409//---
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000410let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000411// -- 32x4 form --
412def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
413 (ins VR512:$src1, i8imm:$src2),
414 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
415 []>, EVEX, EVEX_V512;
416def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
417 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
418 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
419 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
420
421// -- 64x4 form --
422def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
423 (ins VR512:$src1, i8imm:$src2),
424 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
425 []>, EVEX, EVEX_V512, VEX_W;
426let mayStore = 1 in
427def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
428 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
429 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
430 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
431}
432
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000433let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000434// -- 32x4 form --
435def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
436 (ins VR512:$src1, i8imm:$src2),
437 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
438 []>, EVEX, EVEX_V512;
439def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
440 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
441 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
442 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
443
444// -- 64x4 form --
445def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
446 (ins VR512:$src1, i8imm:$src2),
447 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
448 []>, EVEX, EVEX_V512, VEX_W;
449let mayStore = 1 in
450def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
451 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
452 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
453 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
454}
455
456def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
457 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
458 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
459
460def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
461 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
462 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
463
464def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
465 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
466 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
467
468def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
469 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
470 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
471
472
473def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
474 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
475 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
476
477def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
478 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
479 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
480
481def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
482 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
483 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
484
485def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
486 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
487 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
488
489// A 256-bit subvector extract from the first 512-bit vector position
490// is a subregister copy that needs no instruction.
491def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
492 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
493def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
494 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
495def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
496 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
497def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
498 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
499
500// zmm -> xmm
501def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
502 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
503def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
504 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
505def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
506 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
507def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
508 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
509
510
511// A 128-bit subvector insert to the first 512-bit vector position
512// is a subregister copy that needs no instruction.
513def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
515 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
516 sub_ymm)>;
517def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
518 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
519 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
520 sub_ymm)>;
521def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
523 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
524 sub_ymm)>;
525def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
526 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
527 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
528 sub_ymm)>;
529
530def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
531 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
532def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
533 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
534def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
535 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
536def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
537 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
538
539// vextractps - extract 32 bits from XMM
540def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000541 (ins VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000542 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000543 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
544 EVEX;
545
546def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000547 (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000548 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000549 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Elena Demikhovsky2aafc222014-02-11 07:25:59 +0000550 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551
552//===---------------------------------------------------------------------===//
553// AVX-512 BROADCAST
554//---
555multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
556 RegisterClass DestRC,
557 RegisterClass SrcRC, X86MemOperand x86memop> {
558 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000559 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000560 []>, EVEX;
561 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000562 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000563}
564let ExeDomain = SSEPackedSingle in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000565 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000566 VR128X, f32mem>,
567 EVEX_V512, EVEX_CD8<32, CD8VT1>;
568}
569
570let ExeDomain = SSEPackedDouble in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000571 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000572 VR128X, f64mem>,
573 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
574}
575
576def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
577 (VBROADCASTSSZrm addr:$src)>;
578def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
579 (VBROADCASTSDZrm addr:$src)>;
580
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000581def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
582 (VBROADCASTSSZrm addr:$src)>;
583def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
584 (VBROADCASTSDZrm addr:$src)>;
585
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000586multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
587 RegisterClass SrcRC, RegisterClass KRC> {
588 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000589 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000590 []>, EVEX, EVEX_V512;
591 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
592 (ins KRC:$mask, SrcRC:$src),
593 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000594 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595 []>, EVEX, EVEX_V512, EVEX_KZ;
596}
597
598defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
599defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
600 VEX_W;
601
602def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
603 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
604
605def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
606 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
607
608def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
609 (VPBROADCASTDrZrr GR32:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +0000610def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
611 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000612def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
613 (VPBROADCASTQrZrr GR64:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000614def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
615 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000616
Cameron McInally394d5572013-10-31 13:56:31 +0000617def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
618 (VPBROADCASTDrZrr GR32:$src)>;
619def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
620 (VPBROADCASTQrZrr GR64:$src)>;
621
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000622def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
623 (v16i32 immAllZerosV), (i16 GR16:$mask))),
624 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
625def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
626 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
627 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
628
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000629multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
630 X86MemOperand x86memop, PatFrag ld_frag,
631 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
632 RegisterClass KRC> {
633 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000634 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000635 [(set DstRC:$dst,
636 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
637 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
638 VR128X:$src),
639 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000640 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000641 [(set DstRC:$dst,
642 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
643 EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000644 let mayLoad = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000645 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000646 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000647 [(set DstRC:$dst,
648 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
649 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
650 x86memop:$src),
651 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000652 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000653 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
654 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000655 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000656}
657
658defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
659 loadi32, VR512, v16i32, v4i32, VK16WM>,
660 EVEX_V512, EVEX_CD8<32, CD8VT1>;
661defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
662 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
663 EVEX_CD8<64, CD8VT1>;
664
Adam Nemet73f72e12014-06-27 00:43:38 +0000665multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
666 X86MemOperand x86memop, PatFrag ld_frag,
667 RegisterClass KRC> {
668 let mayLoad = 1 in {
669 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
670 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
671 []>, EVEX;
672 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
673 x86memop:$src),
674 !strconcat(OpcodeStr,
675 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
676 []>, EVEX, EVEX_KZ;
677 }
678}
679
680defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
681 i128mem, loadv2i64, VK16WM>,
682 EVEX_V512, EVEX_CD8<32, CD8VT4>;
683defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
684 i256mem, loadv4i64, VK16WM>, VEX_W,
685 EVEX_V512, EVEX_CD8<64, CD8VT4>;
686
Cameron McInally394d5572013-10-31 13:56:31 +0000687def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
688 (VPBROADCASTDZrr VR128X:$src)>;
689def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
690 (VPBROADCASTQZrr VR128X:$src)>;
691
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000692def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
693 (VBROADCASTSSZrr VR128X:$src)>;
694def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
695 (VBROADCASTSDZrr VR128X:$src)>;
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000696
697def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
698 (VBROADCASTSSZrr VR128X:$src)>;
699def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
700 (VBROADCASTSDZrr VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000701
702// Provide fallback in case the load node that is used in the patterns above
703// is used by additional users, which prevents the pattern selection.
704def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
705 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
706def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
707 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
708
709
710let Predicates = [HasAVX512] in {
711def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
712 (EXTRACT_SUBREG
713 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
714 addr:$src)), sub_ymm)>;
715}
716//===----------------------------------------------------------------------===//
717// AVX-512 BROADCAST MASK TO VECTOR REGISTER
718//---
719
720multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
721 RegisterClass DstRC, RegisterClass KRC,
722 ValueType OpVT, ValueType SrcVT> {
723def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000724 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000725 []>, EVEX;
726}
727
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000728let Predicates = [HasCDI] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000729defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
730 VK16, v16i32, v16i1>, EVEX_V512;
731defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
732 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000733}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000734
735//===----------------------------------------------------------------------===//
736// AVX-512 - VPERM
737//
738// -- immediate form --
739multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
740 SDNode OpNode, PatFrag mem_frag,
741 X86MemOperand x86memop, ValueType OpVT> {
742 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
743 (ins RC:$src1, i8imm:$src2),
744 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000745 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000746 [(set RC:$dst,
747 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
748 EVEX;
749 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
750 (ins x86memop:$src1, i8imm:$src2),
751 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000752 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000753 [(set RC:$dst,
754 (OpVT (OpNode (mem_frag addr:$src1),
755 (i8 imm:$src2))))]>, EVEX;
756}
757
758defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
759 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760let ExeDomain = SSEPackedDouble in
761defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
762 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
763
764// -- VPERM - register form --
765multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
766 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
767
768 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
769 (ins RC:$src1, RC:$src2),
770 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000771 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000772 [(set RC:$dst,
773 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
774
775 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
776 (ins RC:$src1, x86memop:$src2),
777 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000778 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000779 [(set RC:$dst,
780 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
781 EVEX_4V;
782}
783
784defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
785 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
786defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
787 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
788let ExeDomain = SSEPackedSingle in
789defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
790 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
791let ExeDomain = SSEPackedDouble in
792defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
793 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
794
795// -- VPERM2I - 3 source operands form --
796multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
797 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet2415a492014-07-02 21:25:54 +0000798 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000799let Constraints = "$src1 = $dst" in {
800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
801 (ins RC:$src1, RC:$src2, RC:$src3),
802 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000803 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000804 [(set RC:$dst,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000805 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000806 EVEX_4V;
807
Adam Nemet2415a492014-07-02 21:25:54 +0000808 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
809 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
810 !strconcat(OpcodeStr,
811 " \t{$src3, $src2, $dst {${mask}}|"
812 "$dst {${mask}}, $src2, $src3}"),
813 [(set RC:$dst, (OpVT (vselect KRC:$mask,
814 (OpNode RC:$src1, RC:$src2,
815 RC:$src3),
816 RC:$src1)))]>,
817 EVEX_4V, EVEX_K;
818
819 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
821 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
822 !strconcat(OpcodeStr,
823 " \t{$src3, $src2, $dst {${mask}} {z} |",
824 "$dst {${mask}} {z}, $src2, $src3}"),
825 [(set RC:$dst, (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
827 RC:$src3),
828 (OpVT (bitconvert
829 (v16i32 immAllZerosV))))))]>,
830 EVEX_4V, EVEX_KZ;
831
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000832 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
833 (ins RC:$src1, RC:$src2, x86memop:$src3),
834 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000835 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000836 [(set RC:$dst,
Adam Nemet2415a492014-07-02 21:25:54 +0000837 (OpVT (OpNode RC:$src1, RC:$src2,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000838 (mem_frag addr:$src3))))]>, EVEX_4V;
Adam Nemet2415a492014-07-02 21:25:54 +0000839
840 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
841 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
842 !strconcat(OpcodeStr,
843 " \t{$src3, $src2, $dst {${mask}}|"
844 "$dst {${mask}}, $src2, $src3}"),
845 [(set RC:$dst,
846 (OpVT (vselect KRC:$mask,
847 (OpNode RC:$src1, RC:$src2,
848 (mem_frag addr:$src3)),
849 RC:$src1)))]>,
850 EVEX_4V, EVEX_K;
851
852 let AddedComplexity = 10 in // Prefer over the rrkz variant
853 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
854 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
855 !strconcat(OpcodeStr,
856 " \t{$src3, $src2, $dst {${mask}} {z}|"
857 "$dst {${mask}} {z}, $src2, $src3}"),
858 [(set RC:$dst,
859 (OpVT (vselect KRC:$mask,
860 (OpNode RC:$src1, RC:$src2,
861 (mem_frag addr:$src3)),
862 (OpVT (bitconvert
863 (v16i32 immAllZerosV))))))]>,
864 EVEX_4V, EVEX_KZ;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000865 }
866}
Adam Nemet2415a492014-07-02 21:25:54 +0000867defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
868 i512mem, X86VPermiv3, v16i32, VK16WM>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
870defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
871 i512mem, X86VPermiv3, v8i64, VK8WM>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
873defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
874 i512mem, X86VPermiv3, v16f32, VK16WM>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
876defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
877 i512mem, X86VPermiv3, v8f64, VK8WM>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000879
Adam Nemetefe9c982014-07-02 21:25:58 +0000880multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
881 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000882 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
883 ValueType MaskVT, RegisterClass MRC> :
Adam Nemetefe9c982014-07-02 21:25:58 +0000884 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
885 OpVT, KRC> {
886 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
887 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
888 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000889
890 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
891 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
892 (!cast<Instruction>(NAME#rrk) VR512:$src1,
893 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000894}
895
896defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000897 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
898 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000899defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000900 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
901 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000902defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000903 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
904 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000905defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000906 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
907 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000908
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000909//===----------------------------------------------------------------------===//
910// AVX-512 - BLEND using mask
911//
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000912multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000913 RegisterClass KRC, RegisterClass RC,
914 X86MemOperand x86memop, PatFrag mem_frag,
915 SDNode OpNode, ValueType vt> {
916 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000917 (ins KRC:$mask, RC:$src1, RC:$src2),
918 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000919 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000920 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000921 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000922 let mayLoad = 1 in
923 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
924 (ins KRC:$mask, RC:$src1, x86memop:$src2),
925 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000926 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000927 []>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000928}
929
930let ExeDomain = SSEPackedSingle in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000931defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000932 VK16WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000933 memopv16f32, vselect, v16f32>,
934 EVEX_CD8<32, CD8VF>, EVEX_V512;
935let ExeDomain = SSEPackedDouble in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000936defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000937 VK8WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000938 memopv8f64, vselect, v8f64>,
939 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
940
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000941def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
942 (v16f32 VR512:$src2), (i16 GR16:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000943 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000944 VR512:$src1, VR512:$src2)>;
945
946def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
947 (v8f64 VR512:$src2), (i8 GR8:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000948 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000949 VR512:$src1, VR512:$src2)>;
950
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000951defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000952 VK16WM, VR512, f512mem,
953 memopv16i32, vselect, v16i32>,
954 EVEX_CD8<32, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000955
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000956defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000957 VK8WM, VR512, f512mem,
958 memopv8i64, vselect, v8i64>,
959 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000960
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000961def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
962 (v16i32 VR512:$src2), (i16 GR16:$mask))),
963 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
964 VR512:$src1, VR512:$src2)>;
965
966def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
967 (v8i64 VR512:$src2), (i8 GR8:$mask))),
968 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
969 VR512:$src1, VR512:$src2)>;
970
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000971let Predicates = [HasAVX512] in {
972def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
973 (v8f32 VR256X:$src2))),
974 (EXTRACT_SUBREG
975 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
976 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
977 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
978
979def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
980 (v8i32 VR256X:$src2))),
981 (EXTRACT_SUBREG
982 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
983 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
984 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
985}
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000986//===----------------------------------------------------------------------===//
987// Compare Instructions
988//===----------------------------------------------------------------------===//
989
990// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
991multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
992 Operand CC, SDNode OpNode, ValueType VT,
993 PatFrag ld_frag, string asm, string asm_alt> {
994 def rr : AVX512Ii8<0xC2, MRMSrcReg,
995 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
996 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
997 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
998 def rm : AVX512Ii8<0xC2, MRMSrcMem,
999 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
1000 [(set VK1:$dst, (OpNode (VT RC:$src1),
1001 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Craig Topper0550ce72014-01-05 04:55:55 +00001002 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001003 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
1004 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1005 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1006 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
1007 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1008 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1009 }
1010}
1011
1012let Predicates = [HasAVX512] in {
1013defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
1014 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1015 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1016 XS;
1017defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
1018 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1019 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1020 XD, VEX_W;
1021}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001022
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001023multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1024 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001025 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001026 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1027 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1028 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001029 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001030 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001031 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001032 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1034 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1035 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001036 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001037 def rrk : AVX512BI<opc, MRMSrcReg,
1038 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1040 "$dst {${mask}}, $src1, $src2}"),
1041 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1042 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1043 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1044 let mayLoad = 1 in
1045 def rmk : AVX512BI<opc, MRMSrcMem,
1046 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1048 "$dst {${mask}}, $src1, $src2}"),
1049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1050 (OpNode (_.VT _.RC:$src1),
1051 (_.VT (bitconvert
1052 (_.LdFrag addr:$src2))))))],
1053 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001054}
1055
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001056multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Robert Khasanovf70f7982014-09-18 14:06:55 +00001057 X86VectorVTInfo _> :
1058 avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001059 let mayLoad = 1 in {
1060 def rmb : AVX512BI<opc, MRMSrcMem,
1061 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1062 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1063 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1064 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1065 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1066 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1067 def rmbk : AVX512BI<opc, MRMSrcMem,
1068 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1069 _.ScalarMemOp:$src2),
1070 !strconcat(OpcodeStr,
1071 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1072 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1073 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1074 (OpNode (_.VT _.RC:$src1),
1075 (X86VBroadcast
1076 (_.ScalarLdFrag addr:$src2)))))],
1077 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1078 }
1079}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001080
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001081multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1082 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1083 let Predicates = [prd] in
1084 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1085 EVEX_V512;
1086
1087 let Predicates = [prd, HasVLX] in {
1088 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1089 EVEX_V256;
1090 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1091 EVEX_V128;
1092 }
1093}
1094
1095multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1096 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1097 Predicate prd> {
1098 let Predicates = [prd] in
1099 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1100 EVEX_V512;
1101
1102 let Predicates = [prd, HasVLX] in {
1103 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1104 EVEX_V256;
1105 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1106 EVEX_V128;
1107 }
1108}
1109
1110defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1111 avx512vl_i8_info, HasBWI>,
1112 EVEX_CD8<8, CD8VF>;
1113
1114defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1115 avx512vl_i16_info, HasBWI>,
1116 EVEX_CD8<16, CD8VF>;
1117
Robert Khasanovf70f7982014-09-18 14:06:55 +00001118defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001119 avx512vl_i32_info, HasAVX512>,
1120 EVEX_CD8<32, CD8VF>;
1121
Robert Khasanovf70f7982014-09-18 14:06:55 +00001122defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001123 avx512vl_i64_info, HasAVX512>,
1124 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1125
1126defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1127 avx512vl_i8_info, HasBWI>,
1128 EVEX_CD8<8, CD8VF>;
1129
1130defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1131 avx512vl_i16_info, HasBWI>,
1132 EVEX_CD8<16, CD8VF>;
1133
Robert Khasanovf70f7982014-09-18 14:06:55 +00001134defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001135 avx512vl_i32_info, HasAVX512>,
1136 EVEX_CD8<32, CD8VF>;
1137
Robert Khasanovf70f7982014-09-18 14:06:55 +00001138defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001139 avx512vl_i64_info, HasAVX512>,
1140 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001141
1142def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001143 (COPY_TO_REGCLASS (VPCMPGTDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001144 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1145 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1146
1147def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001148 (COPY_TO_REGCLASS (VPCMPEQDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001149 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1150 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1151
Robert Khasanov29e3b962014-08-27 09:34:37 +00001152multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1153 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001154 def rri : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001155 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001156 !strconcat("vpcmp${cc}", Suffix,
1157 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001158 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1159 imm:$cc))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001160 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001161 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001162 def rmi : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001163 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001164 !strconcat("vpcmp${cc}", Suffix,
1165 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001166 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1167 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1168 imm:$cc))],
1169 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1170 def rrik : AVX512AIi8<opc, MRMSrcReg,
1171 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1172 AVXCC:$cc),
1173 !strconcat("vpcmp${cc}", Suffix,
1174 "\t{$src2, $src1, $dst {${mask}}|",
1175 "$dst {${mask}}, $src1, $src2}"),
1176 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1177 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1178 imm:$cc)))],
1179 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1180 let mayLoad = 1 in
1181 def rmik : AVX512AIi8<opc, MRMSrcMem,
1182 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1183 AVXCC:$cc),
1184 !strconcat("vpcmp${cc}", Suffix,
1185 "\t{$src2, $src1, $dst {${mask}}|",
1186 "$dst {${mask}}, $src1, $src2}"),
1187 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1188 (OpNode (_.VT _.RC:$src1),
1189 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1190 imm:$cc)))],
1191 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1192
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001193 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001194 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001195 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001196 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
1197 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1198 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001199 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001200 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001201 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
1202 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1203 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001204 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001205 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1206 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1207 i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001208 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001209 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1210 "$dst {${mask}}, $src1, $src2, $cc}"),
1211 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1212 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1213 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1214 i8imm:$cc),
1215 !strconcat("vpcmp", Suffix,
1216 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1217 "$dst {${mask}}, $src1, $src2, $cc}"),
Adam Nemet16de2482014-07-01 18:03:45 +00001218 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001219 }
1220}
1221
Robert Khasanov29e3b962014-08-27 09:34:37 +00001222multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
Robert Khasanovf70f7982014-09-18 14:06:55 +00001223 X86VectorVTInfo _> :
1224 avx512_icmp_cc<opc, Suffix, OpNode, _> {
Robert Khasanov29e3b962014-08-27 09:34:37 +00001225 let mayLoad = 1 in {
1226 def rmib : AVX512AIi8<opc, MRMSrcMem,
1227 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1228 AVXCC:$cc),
1229 !strconcat("vpcmp${cc}", Suffix,
1230 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1231 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1232 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1233 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1234 imm:$cc))],
1235 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1236 def rmibk : AVX512AIi8<opc, MRMSrcMem,
1237 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1238 _.ScalarMemOp:$src2, AVXCC:$cc),
1239 !strconcat("vpcmp${cc}", Suffix,
1240 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1241 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1242 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1243 (OpNode (_.VT _.RC:$src1),
1244 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1245 imm:$cc)))],
1246 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1247 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001248
Robert Khasanov29e3b962014-08-27 09:34:37 +00001249 // Accept explicit immediate argument form instead of comparison code.
1250 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1251 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1252 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1253 i8imm:$cc),
1254 !strconcat("vpcmp", Suffix,
1255 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1256 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1257 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1258 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1259 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1260 _.ScalarMemOp:$src2, i8imm:$cc),
1261 !strconcat("vpcmp", Suffix,
1262 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1263 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1264 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1265 }
1266}
1267
1268multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1269 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1270 let Predicates = [prd] in
1271 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1272
1273 let Predicates = [prd, HasVLX] in {
1274 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1275 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1276 }
1277}
1278
1279multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1280 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1281 let Predicates = [prd] in
1282 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1283 EVEX_V512;
1284
1285 let Predicates = [prd, HasVLX] in {
1286 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1287 EVEX_V256;
1288 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1289 EVEX_V128;
1290 }
1291}
1292
1293defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1294 HasBWI>, EVEX_CD8<8, CD8VF>;
1295defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1296 HasBWI>, EVEX_CD8<8, CD8VF>;
1297
1298defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1299 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1300defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1301 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1302
Robert Khasanovf70f7982014-09-18 14:06:55 +00001303defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001304 HasAVX512>, EVEX_CD8<32, CD8VF>;
Robert Khasanovf70f7982014-09-18 14:06:55 +00001305defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001306 HasAVX512>, EVEX_CD8<32, CD8VF>;
1307
Robert Khasanovf70f7982014-09-18 14:06:55 +00001308defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001309 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
Robert Khasanovf70f7982014-09-18 14:06:55 +00001310defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001311 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001312
Adam Nemet905832b2014-06-26 00:21:12 +00001313// avx512_cmp_packed - compare packed instructions
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001314multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001315 X86MemOperand x86memop, ValueType vt,
1316 string suffix, Domain d> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001317 def rri : AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001318 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1319 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001320 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001321 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1322 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001323 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001324 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001325 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001326 [], d>, EVEX_B;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001327 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001328 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001329 !strconcat("vcmp${cc}", suffix,
1330 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001331 [(set KRC:$dst,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001332 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001333
1334 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001335 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Craig Toppera328ee42013-10-09 04:24:38 +00001336 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
Adam Nemet905832b2014-06-26 00:21:12 +00001337 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001338 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001339 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Craig Toppera328ee42013-10-09 04:24:38 +00001340 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
Adam Nemet905832b2014-06-26 00:21:12 +00001341 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001342 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001343 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001344 }
1345}
1346
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001347defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00001348 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
Craig Topperda7160d2014-02-01 08:17:56 +00001349 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001350defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00001351 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001352 EVEX_CD8<64, CD8VF>;
1353
1354def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1355 (COPY_TO_REGCLASS (VCMPPSZrri
1356 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1357 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1358 imm:$cc), VK8)>;
1359def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1360 (COPY_TO_REGCLASS (VPCMPDZrri
1361 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1362 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1363 imm:$cc), VK8)>;
1364def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1365 (COPY_TO_REGCLASS (VPCMPUDZrri
1366 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1367 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1368 imm:$cc), VK8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001369
1370def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1371 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1372 FROUND_NO_EXC)),
1373 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001374 (I8Imm imm:$cc)), GR16)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001375
1376def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1377 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1378 FROUND_NO_EXC)),
1379 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001380 (I8Imm imm:$cc)), GR8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001381
1382def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1383 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1384 FROUND_CURRENT)),
1385 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1386 (I8Imm imm:$cc)), GR16)>;
1387
1388def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1389 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1390 FROUND_CURRENT)),
1391 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1392 (I8Imm imm:$cc)), GR8)>;
1393
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001394// Mask register copy, including
1395// - copy between mask registers
1396// - load/store mask registers
1397// - copy from GPR to mask register and vice versa
1398//
1399multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1400 string OpcodeStr, RegisterClass KRC,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001401 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001402 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001403 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001404 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001405 let mayLoad = 1 in
1406 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001407 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Robert Khasanov74acbb72014-07-23 14:49:42 +00001408 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001409 let mayStore = 1 in
1410 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001411 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001412 }
1413}
1414
1415multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1416 string OpcodeStr,
1417 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001418 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001419 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001420 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001421 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001422 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001423 }
1424}
1425
Robert Khasanov74acbb72014-07-23 14:49:42 +00001426let Predicates = [HasDQI] in
1427 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1428 i8mem>,
1429 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1430 VEX, PD;
1431
1432let Predicates = [HasAVX512] in
1433 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1434 i16mem>,
1435 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001436 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001437
1438let Predicates = [HasBWI] in {
1439 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1440 i32mem>, VEX, PD, VEX_W;
1441 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1442 VEX, XD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001443}
1444
Robert Khasanov74acbb72014-07-23 14:49:42 +00001445let Predicates = [HasBWI] in {
1446 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1447 i64mem>, VEX, PS, VEX_W;
1448 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1449 VEX, XD, VEX_W;
1450}
1451
1452// GR from/to mask register
1453let Predicates = [HasDQI] in {
1454 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1455 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1456 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1457 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1458}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001459let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001460 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1461 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1462 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1463 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001464}
1465let Predicates = [HasBWI] in {
1466 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1467 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1468}
1469let Predicates = [HasBWI] in {
1470 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1471 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1472}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001473
Robert Khasanov74acbb72014-07-23 14:49:42 +00001474// Load/store kreg
1475let Predicates = [HasDQI] in {
1476 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1477 (KMOVBmk addr:$dst, VK8:$src)>;
1478}
1479let Predicates = [HasAVX512] in {
1480 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001481 (KMOVWmk addr:$dst, VK16:$src)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001482 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001483 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001484 def : Pat<(i1 (load addr:$src)),
1485 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001486 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001487 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001488}
1489let Predicates = [HasBWI] in {
1490 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1491 (KMOVDmk addr:$dst, VK32:$src)>;
1492}
1493let Predicates = [HasBWI] in {
1494 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1495 (KMOVQmk addr:$dst, VK64:$src)>;
1496}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001497
Robert Khasanov74acbb72014-07-23 14:49:42 +00001498let Predicates = [HasAVX512] in {
Elena Demikhovsky34d2d762014-08-18 11:59:06 +00001499 def : Pat<(i1 (trunc (i64 GR64:$src))),
1500 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1501 (i32 1))), VK1)>;
1502
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001503 def : Pat<(i1 (trunc (i32 GR32:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001504 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001505
1506 def : Pat<(i1 (trunc (i8 GR8:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001507 (COPY_TO_REGCLASS
1508 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1509 VK1)>;
1510 def : Pat<(i1 (trunc (i16 GR16:$src))),
1511 (COPY_TO_REGCLASS
1512 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1513 VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001514
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001515 def : Pat<(i32 (zext VK1:$src)),
1516 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001517 def : Pat<(i8 (zext VK1:$src)),
1518 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001519 (AND32ri (KMOVWrk
1520 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001521 def : Pat<(i64 (zext VK1:$src)),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001522 (AND64ri8 (SUBREG_TO_REG (i64 0),
1523 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
Elena Demikhovsky750498c2014-02-17 07:29:33 +00001524 def : Pat<(i16 (zext VK1:$src)),
1525 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001526 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1527 sub_16bit)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001528 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1529 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1530 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1531 (COPY_TO_REGCLASS VK1:$src, VK8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001532}
Robert Khasanov74acbb72014-07-23 14:49:42 +00001533let Predicates = [HasBWI] in {
1534 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1535 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1536 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1537 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1538}
1539
1540
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001541// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1542let Predicates = [HasAVX512] in {
1543 // GR from/to 8-bit mask without native support
1544 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1545 (COPY_TO_REGCLASS
1546 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1547 VK8)>;
1548 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1549 (EXTRACT_SUBREG
1550 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1551 sub_8bit)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001552
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001553 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001554 (COPY_TO_REGCLASS VK16:$src, VK1)>;
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001555 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001556 (COPY_TO_REGCLASS VK8:$src, VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001557}
1558let Predicates = [HasBWI] in {
1559 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1560 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1561 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1562 (COPY_TO_REGCLASS VK64:$src, VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001563}
1564
1565// Mask unary operation
1566// - KNOT
1567multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001568 RegisterClass KRC, SDPatternOperator OpNode,
1569 Predicate prd> {
1570 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001571 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001572 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001573 [(set KRC:$dst, (OpNode KRC:$src))]>;
1574}
1575
Robert Khasanov74acbb72014-07-23 14:49:42 +00001576multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1577 SDPatternOperator OpNode> {
1578 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1579 HasDQI>, VEX, PD;
1580 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1581 HasAVX512>, VEX, PS;
1582 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1583 HasBWI>, VEX, PD, VEX_W;
1584 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1585 HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001586}
1587
Robert Khasanov74acbb72014-07-23 14:49:42 +00001588defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001589
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001590multiclass avx512_mask_unop_int<string IntName, string InstName> {
1591 let Predicates = [HasAVX512] in
1592 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1593 (i16 GR16:$src)),
1594 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1595 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1596}
1597defm : avx512_mask_unop_int<"knot", "KNOT">;
1598
Robert Khasanov74acbb72014-07-23 14:49:42 +00001599let Predicates = [HasDQI] in
1600def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1601let Predicates = [HasAVX512] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001602def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001603let Predicates = [HasBWI] in
1604def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1605let Predicates = [HasBWI] in
1606def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1607
1608// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1609let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001610def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1611 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1612
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001613def : Pat<(not VK8:$src),
1614 (COPY_TO_REGCLASS
1615 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001616}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001617
1618// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001619// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001620multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00001621 RegisterClass KRC, SDPatternOperator OpNode,
1622 Predicate prd> {
1623 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001624 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1625 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001626 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001627 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1628}
1629
Robert Khasanov595683d2014-07-28 13:46:45 +00001630multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1631 SDPatternOperator OpNode> {
1632 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1633 HasDQI>, VEX_4V, VEX_L, PD;
1634 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1635 HasAVX512>, VEX_4V, VEX_L, PS;
1636 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1637 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1638 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1639 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001640}
1641
1642def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1643def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1644
1645let isCommutable = 1 in {
Robert Khasanov595683d2014-07-28 13:46:45 +00001646 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1647 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1648 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1649 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001650}
Robert Khasanov595683d2014-07-28 13:46:45 +00001651let isCommutable = 0 in
1652 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001653
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001654def : Pat<(xor VK1:$src1, VK1:$src2),
1655 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1656 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1657
1658def : Pat<(or VK1:$src1, VK1:$src2),
1659 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1660 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1661
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00001662def : Pat<(and VK1:$src1, VK1:$src2),
1663 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1664 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1665
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001666multiclass avx512_mask_binop_int<string IntName, string InstName> {
1667 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001668 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1669 (i16 GR16:$src1), (i16 GR16:$src2)),
1670 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1671 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1672 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001673}
1674
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001675defm : avx512_mask_binop_int<"kand", "KAND">;
1676defm : avx512_mask_binop_int<"kandn", "KANDN">;
1677defm : avx512_mask_binop_int<"kor", "KOR">;
1678defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1679defm : avx512_mask_binop_int<"kxor", "KXOR">;
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001680
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001681// With AVX-512, 8-bit mask is promoted to 16-bit mask.
1682multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1683 let Predicates = [HasAVX512] in
1684 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1685 (COPY_TO_REGCLASS
1686 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1687 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1688}
1689
1690defm : avx512_binop_pat<and, KANDWrr>;
1691defm : avx512_binop_pat<andn, KANDNWrr>;
1692defm : avx512_binop_pat<or, KORWrr>;
1693defm : avx512_binop_pat<xnor, KXNORWrr>;
1694defm : avx512_binop_pat<xor, KXORWrr>;
1695
1696// Mask unpacking
1697multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001698 RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001699 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001700 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001701 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001702 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001703}
1704
1705multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001706 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
Craig Topperae11aed2014-01-14 07:41:20 +00001707 VEX_4V, VEX_L, PD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001708}
1709
1710defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001711def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1712 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1713 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1714
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001715
1716multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1717 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001718 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1719 (i16 GR16:$src1), (i16 GR16:$src2)),
1720 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1721 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1722 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001723}
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001724defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001725
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001726// Mask bit testing
1727multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1728 SDNode OpNode> {
1729 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1730 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001731 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001732 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1733}
1734
1735multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1736 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001737 VEX, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001738}
1739
1740defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001741
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001742def : Pat<(X86cmp VK1:$src1, (i1 0)),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001743 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001744 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001745
1746// Mask shift
1747multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1748 SDNode OpNode> {
1749 let Predicates = [HasAVX512] in
1750 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1751 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001752 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001753 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1754}
1755
1756multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1757 SDNode OpNode> {
1758 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topperae11aed2014-01-14 07:41:20 +00001759 VEX, TAPD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001760}
1761
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001762defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1763defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001764
1765// Mask setting all 0s or 1s
1766multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1767 let Predicates = [HasAVX512] in
1768 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1769 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1770 [(set KRC:$dst, (VT Val))]>;
1771}
1772
1773multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001774 defm B : avx512_mask_setop<VK8, v8i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001775 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1776}
1777
1778defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1779defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1780
1781// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1782let Predicates = [HasAVX512] in {
1783 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1784 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001785 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1786 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1787 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001788}
1789def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1790 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1791
1792def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1793 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1794
1795def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1796 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1797
Elena Demikhovsky9737e382014-03-02 09:19:44 +00001798def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1799 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1800
1801def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1802 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001803//===----------------------------------------------------------------------===//
1804// AVX-512 - Aligned and unaligned load and store
1805//
1806
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001807multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1808 RegisterClass KRC, RegisterClass RC,
1809 ValueType vt, ValueType zvt, X86MemOperand memop,
1810 Domain d, bit IsReMaterializable = 1> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001811let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001812 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001813 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1814 d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001815 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001816 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1817 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001818 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001819 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1820 SchedRW = [WriteLoad] in
1821 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1822 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1823 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1824 d>, EVEX;
1825
1826 let AddedComplexity = 20 in {
1827 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1828 let hasSideEffects = 0 in
1829 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1830 (ins RC:$src0, KRC:$mask, RC:$src1),
1831 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1832 "${dst} {${mask}}, $src1}"),
1833 [(set RC:$dst, (vt (vselect KRC:$mask,
1834 (vt RC:$src1),
1835 (vt RC:$src0))))],
1836 d>, EVEX, EVEX_K;
1837 let mayLoad = 1, SchedRW = [WriteLoad] in
1838 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1839 (ins RC:$src0, KRC:$mask, memop:$src1),
1840 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1841 "${dst} {${mask}}, $src1}"),
1842 [(set RC:$dst, (vt
1843 (vselect KRC:$mask,
1844 (vt (bitconvert (ld_frag addr:$src1))),
1845 (vt RC:$src0))))],
1846 d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001847 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001848 let mayLoad = 1, SchedRW = [WriteLoad] in
1849 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1850 (ins KRC:$mask, memop:$src),
1851 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1852 "${dst} {${mask}} {z}, $src}"),
1853 [(set RC:$dst, (vt
1854 (vselect KRC:$mask,
1855 (vt (bitconvert (ld_frag addr:$src))),
1856 (vt (bitconvert (zvt immAllZerosV))))))],
1857 d>, EVEX, EVEX_KZ;
1858 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001859}
1860
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001861multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1862 string elty, string elsz, string vsz512,
1863 string vsz256, string vsz128, Domain d,
1864 Predicate prd, bit IsReMaterializable = 1> {
1865 let Predicates = [prd] in
1866 defm Z : avx512_load<opc, OpcodeStr,
1867 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1868 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1869 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1870 !cast<X86MemOperand>(elty##"512mem"), d,
1871 IsReMaterializable>, EVEX_V512;
1872
1873 let Predicates = [prd, HasVLX] in {
1874 defm Z256 : avx512_load<opc, OpcodeStr,
1875 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1876 "v"##vsz256##elty##elsz, "v4i64")),
1877 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1878 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1879 !cast<X86MemOperand>(elty##"256mem"), d,
1880 IsReMaterializable>, EVEX_V256;
1881
1882 defm Z128 : avx512_load<opc, OpcodeStr,
1883 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1884 "v"##vsz128##elty##elsz, "v2i64")),
1885 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1886 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1887 !cast<X86MemOperand>(elty##"128mem"), d,
1888 IsReMaterializable>, EVEX_V128;
1889 }
1890}
1891
1892
1893multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1894 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1895 X86MemOperand memop, Domain d> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001896 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1897 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001898 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001899 EVEX;
1900 let Constraints = "$src1 = $dst" in
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001901 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1902 (ins RC:$src1, KRC:$mask, RC:$src2),
1903 !strconcat(OpcodeStr,
1904 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001905 EVEX, EVEX_K;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001906 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001907 (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001908 !strconcat(OpcodeStr,
1909 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001910 [], d>, EVEX, EVEX_KZ;
1911 }
1912 let mayStore = 1 in {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001913 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1914 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1915 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001916 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001917 (ins memop:$dst, KRC:$mask, RC:$src),
1918 !strconcat(OpcodeStr,
1919 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001920 [], d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001921 }
1922}
1923
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001924
1925multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1926 string st_suff_512, string st_suff_256,
1927 string st_suff_128, string elty, string elsz,
1928 string vsz512, string vsz256, string vsz128,
1929 Domain d, Predicate prd> {
1930 let Predicates = [prd] in
1931 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1932 !cast<ValueType>("v"##vsz512##elty##elsz),
1933 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1934 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1935
1936 let Predicates = [prd, HasVLX] in {
1937 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1938 !cast<ValueType>("v"##vsz256##elty##elsz),
1939 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1940 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1941
1942 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1943 !cast<ValueType>("v"##vsz128##elty##elsz),
1944 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1945 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1946 }
1947}
1948
1949defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1950 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1951 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1952 "512", "256", "", "f", "32", "16", "8", "4",
1953 SSEPackedSingle, HasAVX512>,
1954 PS, EVEX_CD8<32, CD8VF>;
1955
1956defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1957 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1958 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1959 "512", "256", "", "f", "64", "8", "4", "2",
1960 SSEPackedDouble, HasAVX512>,
1961 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1962
1963defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1964 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1965 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1966 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1967 PS, EVEX_CD8<32, CD8VF>;
1968
1969defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1970 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1971 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1972 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1973 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1974
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001975def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001976 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001977 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001978
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001979def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1980 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1981 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001982
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001983def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1984 GR16:$mask),
1985 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1986 VR512:$src)>;
1987def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1988 GR8:$mask),
1989 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1990 VR512:$src)>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001991
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001992defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1993 "16", "8", "4", SSEPackedInt, HasAVX512>,
1994 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1995 "512", "256", "", "i", "32", "16", "8", "4",
1996 SSEPackedInt, HasAVX512>,
1997 PD, EVEX_CD8<32, CD8VF>;
1998
1999defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
2000 "8", "4", "2", SSEPackedInt, HasAVX512>,
2001 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
2002 "512", "256", "", "i", "64", "8", "4", "2",
2003 SSEPackedInt, HasAVX512>,
2004 PD, VEX_W, EVEX_CD8<64, CD8VF>;
2005
2006defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
2007 "64", "32", "16", SSEPackedInt, HasBWI>,
2008 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
2009 "i", "8", "64", "32", "16", SSEPackedInt,
2010 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
2011
2012defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
2013 "32", "16", "8", SSEPackedInt, HasBWI>,
2014 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
2015 "i", "16", "32", "16", "8", SSEPackedInt,
2016 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
2017
2018defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
2019 "16", "8", "4", SSEPackedInt, HasAVX512>,
2020 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
2021 "i", "32", "16", "8", "4", SSEPackedInt,
2022 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
2023
2024defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
2025 "8", "4", "2", SSEPackedInt, HasAVX512>,
2026 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
2027 "i", "64", "8", "4", "2", SSEPackedInt,
2028 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00002029
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002030def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
2031 (v16i32 immAllZerosV), GR16:$mask)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002032 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002033
2034def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002035 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2036 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002037
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002038def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002039 GR16:$mask),
2040 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002041 VR512:$src)>;
2042def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002043 GR8:$mask),
2044 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002045 VR512:$src)>;
2046
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002047let AddedComplexity = 20 in {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002048def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002049 (bc_v8i64 (v16i32 immAllZerosV)))),
2050 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002051
2052def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002053 (v8i64 VR512:$src))),
2054 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002055 VK8), VR512:$src)>;
2056
2057def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2058 (v16i32 immAllZerosV))),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002059 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002060
2061def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002062 (v16i32 VR512:$src))),
2063 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002064}
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002065
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002066// Move Int Doubleword to Packed Double Int
2067//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002068def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002069 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002070 [(set VR128X:$dst,
2071 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2072 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002073def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002074 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002075 [(set VR128X:$dst,
2076 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2077 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002078def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002079 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002080 [(set VR128X:$dst,
2081 (v2i64 (scalar_to_vector GR64:$src)))],
2082 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
Craig Topper88adf2a2013-10-12 05:41:08 +00002083let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002084def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002085 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002086 [(set FR64:$dst, (bitconvert GR64:$src))],
2087 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002088def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002089 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002090 [(set GR64:$dst, (bitconvert FR64:$src))],
2091 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002092}
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002093def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002094 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002095 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2096 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2097 EVEX_CD8<64, CD8VT1>;
2098
2099// Move Int Doubleword to Single Scalar
2100//
Craig Topper88adf2a2013-10-12 05:41:08 +00002101let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002102def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002103 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002104 [(set FR32X:$dst, (bitconvert GR32:$src))],
2105 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2106
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002107def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002108 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002109 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2110 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002111}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002112
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002113// Move doubleword from xmm register to r/m32
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002114//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002115def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002116 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002117 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2118 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2119 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002120def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002121 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002122 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002123 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2124 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2125 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2126
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002127// Move quadword from xmm1 register to r/m64
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002128//
2129def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002130 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002131 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2132 (iPTR 0)))],
Craig Topperae11aed2014-01-14 07:41:20 +00002133 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002134 Requires<[HasAVX512, In64BitMode]>;
2135
Elena Demikhovsky85aeffa2013-10-03 12:03:26 +00002136def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002137 (ins i64mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002138 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002139 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2140 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topperae11aed2014-01-14 07:41:20 +00002141 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002142 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2143
2144// Move Scalar Single to Double Int
2145//
Craig Topper88adf2a2013-10-12 05:41:08 +00002146let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002147def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002148 (ins FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002149 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002150 [(set GR32:$dst, (bitconvert FR32X:$src))],
2151 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002152def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002153 (ins i32mem:$dst, FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002154 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002155 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2156 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002157}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002158
2159// Move Quadword Int to Packed Quadword Int
2160//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002161def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002162 (ins i64mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002163 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002164 [(set VR128X:$dst,
2165 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2166 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2167
2168//===----------------------------------------------------------------------===//
2169// AVX-512 MOVSS, MOVSD
2170//===----------------------------------------------------------------------===//
2171
2172multiclass avx512_move_scalar <string asm, RegisterClass RC,
2173 SDNode OpNode, ValueType vt,
2174 X86MemOperand x86memop, PatFrag mem_pat> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002175 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002176 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002177 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002178 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2179 (scalar_to_vector RC:$src2))))],
2180 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002181 let Constraints = "$src1 = $dst" in
2182 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2183 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2184 !strconcat(asm,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002185 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002186 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002187 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002188 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002189 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2190 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002191 let mayStore = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002192 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002193 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002194 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2195 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002196 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2197 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2198 [], IIC_SSE_MOV_S_MR>,
2199 EVEX, VEX_LIG, EVEX_K;
2200 } // mayStore
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002201 } //hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002202}
2203
2204let ExeDomain = SSEPackedSingle in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002205defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002206 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2207
2208let ExeDomain = SSEPackedDouble in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002209defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002210 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2211
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002212def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2213 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2214 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2215
2216def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2217 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2218 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002219
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002220def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2221 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2222 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2223
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002224// For the disassembler
Craig Topper3484fc22014-01-05 04:17:28 +00002225let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002226 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2227 (ins VR128X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002228 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002229 IIC_SSE_MOV_S_RR>,
2230 XS, EVEX_4V, VEX_LIG;
2231 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2232 (ins VR128X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002233 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002234 IIC_SSE_MOV_S_RR>,
2235 XD, EVEX_4V, VEX_LIG, VEX_W;
2236}
2237
2238let Predicates = [HasAVX512] in {
2239 let AddedComplexity = 15 in {
2240 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2241 // MOVS{S,D} to the lower bits.
2242 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2243 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2244 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2245 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2246 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2247 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2248 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2249 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2250
2251 // Move low f32 and clear high bits.
2252 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2253 (SUBREG_TO_REG (i32 0),
2254 (VMOVSSZrr (v4f32 (V_SET0)),
2255 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2256 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2257 (SUBREG_TO_REG (i32 0),
2258 (VMOVSSZrr (v4i32 (V_SET0)),
2259 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2260 }
2261
2262 let AddedComplexity = 20 in {
2263 // MOVSSrm zeros the high parts of the register; represent this
2264 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2265 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2266 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2267 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2268 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2269 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2270 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2271
2272 // MOVSDrm zeros the high parts of the register; represent this
2273 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2274 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2275 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2276 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2277 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2278 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2279 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2280 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2281 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2282 def : Pat<(v2f64 (X86vzload addr:$src)),
2283 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2284
2285 // Represent the same patterns above but in the form they appear for
2286 // 256-bit types
2287 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2288 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002289 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002290 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2291 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2292 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2293 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2294 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2295 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2296 }
2297 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2298 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2299 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2300 FR32X:$src)), sub_xmm)>;
2301 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2302 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2303 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2304 FR64X:$src)), sub_xmm)>;
2305 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2306 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002307 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002308
2309 // Move low f64 and clear high bits.
2310 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2311 (SUBREG_TO_REG (i32 0),
2312 (VMOVSDZrr (v2f64 (V_SET0)),
2313 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2314
2315 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2316 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2317 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2318
2319 // Extract and store.
2320 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2321 addr:$dst),
2322 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2323 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2324 addr:$dst),
2325 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2326
2327 // Shuffle with VMOVSS
2328 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2329 (VMOVSSZrr (v4i32 VR128X:$src1),
2330 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2331 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2332 (VMOVSSZrr (v4f32 VR128X:$src1),
2333 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2334
2335 // 256-bit variants
2336 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2337 (SUBREG_TO_REG (i32 0),
2338 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2339 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2340 sub_xmm)>;
2341 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2342 (SUBREG_TO_REG (i32 0),
2343 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2344 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2345 sub_xmm)>;
2346
2347 // Shuffle with VMOVSD
2348 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2349 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2350 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2351 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2352 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2353 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2354 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2355 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2356
2357 // 256-bit variants
2358 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2359 (SUBREG_TO_REG (i32 0),
2360 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2361 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2362 sub_xmm)>;
2363 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2364 (SUBREG_TO_REG (i32 0),
2365 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2366 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2367 sub_xmm)>;
2368
2369 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2370 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2371 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2372 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2373 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2374 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2375 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2376 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2377}
2378
2379let AddedComplexity = 15 in
2380def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2381 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002382 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002383 [(set VR128X:$dst, (v2i64 (X86vzmovl
2384 (v2i64 VR128X:$src))))],
2385 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2386
2387let AddedComplexity = 20 in
2388def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2389 (ins i128mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002390 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002391 [(set VR128X:$dst, (v2i64 (X86vzmovl
2392 (loadv2i64 addr:$src))))],
2393 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2394 EVEX_CD8<8, CD8VT8>;
2395
2396let Predicates = [HasAVX512] in {
2397 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2398 let AddedComplexity = 20 in {
2399 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2400 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002401 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2402 (VMOV64toPQIZrr GR64:$src)>;
2403 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2404 (VMOVDI2PDIZrr GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002405
2406 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2407 (VMOVDI2PDIZrm addr:$src)>;
2408 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2409 (VMOVDI2PDIZrm addr:$src)>;
2410 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2411 (VMOVZPQILo2PQIZrm addr:$src)>;
2412 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2413 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00002414 def : Pat<(v2i64 (X86vzload addr:$src)),
2415 (VMOVZPQILo2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002416 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002417
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002418 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2419 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2420 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2421 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2422 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2423 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2424 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2425}
2426
2427def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2428 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2429
2430def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2431 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2432
2433def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2434 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2435
2436def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2437 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2438
2439//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00002440// AVX-512 - Non-temporals
2441//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00002442let SchedRW = [WriteLoad] in {
2443 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2444 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2445 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2446 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2447 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002448
Robert Khasanoved882972014-08-13 10:46:00 +00002449 let Predicates = [HasAVX512, HasVLX] in {
2450 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2451 (ins i256mem:$src),
2452 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2453 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2454 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002455
Robert Khasanoved882972014-08-13 10:46:00 +00002456 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2457 (ins i128mem:$src),
2458 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2459 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2460 EVEX_CD8<64, CD8VF>;
2461 }
Adam Nemetefd07852014-06-18 16:51:10 +00002462}
2463
Robert Khasanoved882972014-08-13 10:46:00 +00002464multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2465 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2466 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2467 let SchedRW = [WriteStore], mayStore = 1,
2468 AddedComplexity = 400 in
2469 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2470 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2471 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2472}
2473
2474multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2475 string elty, string elsz, string vsz512,
2476 string vsz256, string vsz128, Domain d,
2477 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2478 let Predicates = [prd] in
2479 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2480 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2481 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2482 EVEX_V512;
2483
2484 let Predicates = [prd, HasVLX] in {
2485 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2486 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2487 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2488 EVEX_V256;
2489
2490 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2491 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2492 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2493 EVEX_V128;
2494 }
2495}
2496
2497defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2498 "i", "64", "8", "4", "2", SSEPackedInt,
2499 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2500
2501defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2502 "f", "64", "8", "4", "2", SSEPackedDouble,
2503 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2504
2505defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2506 "f", "32", "16", "8", "4", SSEPackedSingle,
2507 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2508
Adam Nemet7f62b232014-06-10 16:39:53 +00002509//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002510// AVX-512 - Integer arithmetic
2511//
2512multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002513 ValueType OpVT, RegisterClass KRC,
2514 RegisterClass RC, PatFrag memop_frag,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002515 X86MemOperand x86memop, PatFrag scalar_mfrag,
2516 X86MemOperand x86scalar_mop, string BrdcstStr,
2517 OpndItins itins, bit IsCommutable = 0> {
2518 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002519 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2520 (ins RC:$src1, RC:$src2),
2521 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2522 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2523 itins.rr>, EVEX_4V;
2524 let AddedComplexity = 30 in {
2525 let Constraints = "$src0 = $dst" in
2526 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2527 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2528 !strconcat(OpcodeStr,
2529 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2530 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2531 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2532 RC:$src0)))],
2533 itins.rr>, EVEX_4V, EVEX_K;
2534 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2535 (ins KRC:$mask, RC:$src1, RC:$src2),
2536 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2537 "|$dst {${mask}} {z}, $src1, $src2}"),
2538 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2539 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2540 (OpVT immAllZerosV))))],
2541 itins.rr>, EVEX_4V, EVEX_KZ;
2542 }
2543
2544 let mayLoad = 1 in {
2545 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2546 (ins RC:$src1, x86memop:$src2),
2547 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2548 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2549 itins.rm>, EVEX_4V;
2550 let AddedComplexity = 30 in {
2551 let Constraints = "$src0 = $dst" in
2552 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2553 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2554 !strconcat(OpcodeStr,
2555 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2556 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2557 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2558 RC:$src0)))],
2559 itins.rm>, EVEX_4V, EVEX_K;
2560 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2561 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2562 !strconcat(OpcodeStr,
2563 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2564 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2565 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2566 (OpVT immAllZerosV))))],
2567 itins.rm>, EVEX_4V, EVEX_KZ;
2568 }
2569 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2570 (ins RC:$src1, x86scalar_mop:$src2),
2571 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2572 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2573 [(set RC:$dst, (OpNode RC:$src1,
2574 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2575 itins.rm>, EVEX_4V, EVEX_B;
2576 let AddedComplexity = 30 in {
2577 let Constraints = "$src0 = $dst" in
2578 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2579 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2580 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2581 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2582 BrdcstStr, "}"),
2583 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2584 (OpNode (OpVT RC:$src1),
2585 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2586 RC:$src0)))],
2587 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2588 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2589 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2590 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2591 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2592 BrdcstStr, "}"),
2593 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2594 (OpNode (OpVT RC:$src1),
2595 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2596 (OpVT immAllZerosV))))],
2597 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2598 }
2599 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002600}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002601
2602multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2603 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2604 PatFrag memop_frag, X86MemOperand x86memop,
2605 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2606 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002607 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002608 {
2609 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002610 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002611 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002612 []>, EVEX_4V;
2613 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2614 (ins KRC:$mask, RC:$src1, RC:$src2),
2615 !strconcat(OpcodeStr,
2616 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2617 [], itins.rr>, EVEX_4V, EVEX_K;
2618 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2619 (ins KRC:$mask, RC:$src1, RC:$src2),
2620 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2621 "|$dst {${mask}} {z}, $src1, $src2}"),
2622 [], itins.rr>, EVEX_4V, EVEX_KZ;
2623 }
2624 let mayLoad = 1 in {
2625 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2626 (ins RC:$src1, x86memop:$src2),
2627 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2628 []>, EVEX_4V;
2629 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2630 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2631 !strconcat(OpcodeStr,
2632 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2633 [], itins.rm>, EVEX_4V, EVEX_K;
2634 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2635 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2636 !strconcat(OpcodeStr,
2637 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2638 [], itins.rm>, EVEX_4V, EVEX_KZ;
2639 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2640 (ins RC:$src1, x86scalar_mop:$src2),
2641 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2642 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2643 [], itins.rm>, EVEX_4V, EVEX_B;
2644 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2645 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2646 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2647 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2648 BrdcstStr, "}"),
2649 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2650 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2651 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2652 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2653 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2654 BrdcstStr, "}"),
2655 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2656 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002657}
2658
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002659defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2660 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2661 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002662
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002663defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2664 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2665 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002666
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002667defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2668 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2669 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002670
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002671defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2672 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2673 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002674
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002675defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2676 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2677 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002678
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002679defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2680 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2681 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2682 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002683
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002684defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2685 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2686 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002687
2688def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2689 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2690
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002691def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2692 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2693 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2694def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2695 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2696 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2697
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002698defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2699 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2700 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002701 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002702defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2703 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2704 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002705 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002706
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002707defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2708 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2709 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002710 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002711defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2712 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2713 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002714 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002715
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002716defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2717 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2718 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002719 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002720defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2721 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2722 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002723 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002724
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002725defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2726 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2727 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002728 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002729defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2730 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2731 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002732 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002733
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002734def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2735 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2736 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2737def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2738 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2739 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2740def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2741 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2742 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2743def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2744 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2745 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2746def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2747 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2748 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2749def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2750 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2751 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2752def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2753 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2754 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2755def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2756 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2757 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002758//===----------------------------------------------------------------------===//
2759// AVX-512 - Unpack Instructions
2760//===----------------------------------------------------------------------===//
2761
2762multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2763 PatFrag mem_frag, RegisterClass RC,
2764 X86MemOperand x86memop, string asm,
2765 Domain d> {
2766 def rr : AVX512PI<opc, MRMSrcReg,
2767 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2768 asm, [(set RC:$dst,
2769 (vt (OpNode RC:$src1, RC:$src2)))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002770 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002771 def rm : AVX512PI<opc, MRMSrcMem,
2772 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2773 asm, [(set RC:$dst,
2774 (vt (OpNode RC:$src1,
2775 (bitconvert (mem_frag addr:$src2)))))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002776 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002777}
2778
2779defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2780 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002781 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002782defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2783 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002784 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002785defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2786 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002787 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002788defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2789 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002790 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002791
2792multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2793 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2794 X86MemOperand x86memop> {
2795 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2796 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002797 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002798 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2799 IIC_SSE_UNPCK>, EVEX_4V;
2800 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2801 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002802 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002803 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2804 (bitconvert (memop_frag addr:$src2)))))],
2805 IIC_SSE_UNPCK>, EVEX_4V;
2806}
2807defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2808 VR512, memopv16i32, i512mem>, EVEX_V512,
2809 EVEX_CD8<32, CD8VF>;
2810defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2811 VR512, memopv8i64, i512mem>, EVEX_V512,
2812 VEX_W, EVEX_CD8<64, CD8VF>;
2813defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2814 VR512, memopv16i32, i512mem>, EVEX_V512,
2815 EVEX_CD8<32, CD8VF>;
2816defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2817 VR512, memopv8i64, i512mem>, EVEX_V512,
2818 VEX_W, EVEX_CD8<64, CD8VF>;
2819//===----------------------------------------------------------------------===//
2820// AVX-512 - PSHUFD
2821//
2822
2823multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2824 SDNode OpNode, PatFrag mem_frag,
2825 X86MemOperand x86memop, ValueType OpVT> {
2826 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2827 (ins RC:$src1, i8imm:$src2),
2828 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002829 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002830 [(set RC:$dst,
2831 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2832 EVEX;
2833 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2834 (ins x86memop:$src1, i8imm:$src2),
2835 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002836 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002837 [(set RC:$dst,
2838 (OpVT (OpNode (mem_frag addr:$src1),
2839 (i8 imm:$src2))))]>, EVEX;
2840}
2841
2842defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
Craig Topperae11aed2014-01-14 07:41:20 +00002843 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002844
2845let ExeDomain = SSEPackedSingle in
2846defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002847 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002848 EVEX_CD8<32, CD8VF>;
2849let ExeDomain = SSEPackedDouble in
2850defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002851 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002852 VEX_W, EVEX_CD8<32, CD8VF>;
2853
2854def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2855 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2856def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2857 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2858
2859//===----------------------------------------------------------------------===//
2860// AVX-512 Logical Instructions
2861//===----------------------------------------------------------------------===//
2862
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002863defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002864 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2865 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002866defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002867 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2868 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002869defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002870 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2871 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002872defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002873 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2874 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002875defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002876 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2877 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002878defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002879 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2880 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002881defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002882 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2883 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002884defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2885 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2886 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002887
2888//===----------------------------------------------------------------------===//
2889// AVX-512 FP arithmetic
2890//===----------------------------------------------------------------------===//
2891
2892multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2893 SizeItins itins> {
Elena Demikhovskycf088092013-12-11 14:31:04 +00002894 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002895 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2896 EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00002897 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002898 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2899 EVEX_CD8<64, CD8VT1>;
2900}
2901
2902let isCommutable = 1 in {
2903defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2904defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2905defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2906defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2907}
2908let isCommutable = 0 in {
2909defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2910defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2911}
2912
2913multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002914 RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002915 RegisterClass RC, ValueType vt,
2916 X86MemOperand x86memop, PatFrag mem_frag,
2917 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2918 string BrdcstStr,
2919 Domain d, OpndItins itins, bit commutable> {
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002920 let isCommutable = commutable in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002921 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002922 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002923 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Craig Topperda7160d2014-02-01 08:17:56 +00002924 EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002925
2926 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2927 !strconcat(OpcodeStr,
2928 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2929 [], itins.rr, d>, EVEX_4V, EVEX_K;
2930
2931 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2932 !strconcat(OpcodeStr,
2933 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2934 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2935 }
2936
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002937 let mayLoad = 1 in {
2938 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002939 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002940 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
Craig Topperda7160d2014-02-01 08:17:56 +00002941 itins.rm, d>, EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002942
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002943 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2944 (ins RC:$src1, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002945 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002946 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002947 [(set RC:$dst, (OpNode RC:$src1,
2948 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
Craig Topperda7160d2014-02-01 08:17:56 +00002949 itins.rm, d>, EVEX_4V, EVEX_B;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002950
2951 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2952 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2953 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2954 [], itins.rm, d>, EVEX_4V, EVEX_K;
2955
2956 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2957 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2958 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2959 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2960
2961 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2962 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2963 " \t{${src2}", BrdcstStr,
2964 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2965 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2966
2967 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2968 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2969 " \t{${src2}", BrdcstStr,
2970 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2971 BrdcstStr, "}"),
2972 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2973 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002974}
2975
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002976defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002977 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002978 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002979
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002980defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002981 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2982 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002983 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002984
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002985defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002986 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002987 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002988defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002989 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2990 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002991 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002992
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002993defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002994 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2995 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002996 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002997defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002998 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2999 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003000 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003001
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003002defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003003 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3004 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00003005 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003006defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003007 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3008 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00003009 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003010
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003011defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003012 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00003013 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003014defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003015 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00003016 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003017
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003018defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003019 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3020 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00003021 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003022defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003023 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3024 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00003025 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003026
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003027def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
3028 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3029 (i16 -1), FROUND_CURRENT)),
3030 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
3031
3032def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
3033 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3034 (i8 -1), FROUND_CURRENT)),
3035 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
3036
3037def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
3038 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3039 (i16 -1), FROUND_CURRENT)),
3040 (VMINPSZrr VR512:$src1, VR512:$src2)>;
3041
3042def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
3043 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3044 (i8 -1), FROUND_CURRENT)),
3045 (VMINPDZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003046//===----------------------------------------------------------------------===//
3047// AVX-512 VPTESTM instructions
3048//===----------------------------------------------------------------------===//
3049
3050multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3051 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3052 SDNode OpNode, ValueType vt> {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003053 def rr : AVX512PI<opc, MRMSrcReg,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003054 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003055 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003056 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
3057 SSEPackedInt>, EVEX_4V;
3058 def rm : AVX512PI<opc, MRMSrcMem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003059 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003060 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003061 [(set KRC:$dst, (OpNode (vt RC:$src1),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003062 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003063}
3064
3065defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003066 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003067 EVEX_CD8<32, CD8VF>;
3068defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003069 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003070 EVEX_CD8<64, CD8VF>;
3071
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003072let Predicates = [HasCDI] in {
3073defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
3074 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
3075 EVEX_CD8<32, CD8VF>;
3076defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003077 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003078 EVEX_CD8<64, CD8VF>;
3079}
3080
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003081def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3082 (v16i32 VR512:$src2), (i16 -1))),
3083 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3084
3085def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3086 (v8i64 VR512:$src2), (i8 -1))),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003087 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003088//===----------------------------------------------------------------------===//
3089// AVX-512 Shift instructions
3090//===----------------------------------------------------------------------===//
3091multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3092 string OpcodeStr, SDNode OpNode, RegisterClass RC,
3093 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
3094 RegisterClass KRC> {
3095 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003096 (ins RC:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003097 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Lang Hames27839932013-10-21 17:51:24 +00003098 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003099 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3100 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003101 (ins KRC:$mask, RC:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003102 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003103 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003104 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3105 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003106 (ins x86memop:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003107 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003108 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
Lang Hames27839932013-10-21 17:51:24 +00003109 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003110 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003111 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003112 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003113 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003114 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3115}
3116
3117multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3118 RegisterClass RC, ValueType vt, ValueType SrcVT,
3119 PatFrag bc_frag, RegisterClass KRC> {
3120 // src2 is always 128-bit
3121 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3122 (ins RC:$src1, VR128X:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003123 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003124 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3125 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3126 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3127 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3128 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003129 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003130 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3131 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3132 (ins RC:$src1, i128mem:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003133 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003134 [(set RC:$dst, (vt (OpNode RC:$src1,
3135 (bc_frag (memopv2i64 addr:$src2)))))],
3136 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3137 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3138 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3139 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003140 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003141 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3142}
3143
3144defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3145 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3146 EVEX_V512, EVEX_CD8<32, CD8VF>;
3147defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3148 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3149 EVEX_CD8<32, CD8VQ>;
3150
3151defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3152 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3153 EVEX_CD8<64, CD8VF>, VEX_W;
3154defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3155 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3156 EVEX_CD8<64, CD8VQ>, VEX_W;
3157
3158defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3159 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3160 EVEX_CD8<32, CD8VF>;
3161defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3162 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3163 EVEX_CD8<32, CD8VQ>;
3164
3165defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3166 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3167 EVEX_CD8<64, CD8VF>, VEX_W;
3168defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3169 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3170 EVEX_CD8<64, CD8VQ>, VEX_W;
3171
3172defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3173 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3174 EVEX_V512, EVEX_CD8<32, CD8VF>;
3175defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3176 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3177 EVEX_CD8<32, CD8VQ>;
3178
3179defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3180 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3181 EVEX_CD8<64, CD8VF>, VEX_W;
3182defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3183 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3184 EVEX_CD8<64, CD8VQ>, VEX_W;
3185
3186//===-------------------------------------------------------------------===//
3187// Variable Bit Shifts
3188//===-------------------------------------------------------------------===//
3189multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3190 RegisterClass RC, ValueType vt,
3191 X86MemOperand x86memop, PatFrag mem_frag> {
3192 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3193 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003194 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003195 [(set RC:$dst,
3196 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3197 EVEX_4V;
3198 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3199 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003200 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003201 [(set RC:$dst,
3202 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3203 EVEX_4V;
3204}
3205
3206defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3207 i512mem, memopv16i32>, EVEX_V512,
3208 EVEX_CD8<32, CD8VF>;
3209defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3210 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3211 EVEX_CD8<64, CD8VF>;
3212defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3213 i512mem, memopv16i32>, EVEX_V512,
3214 EVEX_CD8<32, CD8VF>;
3215defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3216 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3217 EVEX_CD8<64, CD8VF>;
3218defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3219 i512mem, memopv16i32>, EVEX_V512,
3220 EVEX_CD8<32, CD8VF>;
3221defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3222 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3223 EVEX_CD8<64, CD8VF>;
3224
3225//===----------------------------------------------------------------------===//
3226// AVX-512 - MOVDDUP
3227//===----------------------------------------------------------------------===//
3228
3229multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3230 X86MemOperand x86memop, PatFrag memop_frag> {
3231def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003232 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003233 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3234def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003235 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003236 [(set RC:$dst,
3237 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3238}
3239
3240defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3241 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3242def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3243 (VMOVDDUPZrm addr:$src)>;
3244
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003245//===---------------------------------------------------------------------===//
3246// Replicate Single FP - MOVSHDUP and MOVSLDUP
3247//===---------------------------------------------------------------------===//
3248multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3249 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3250 X86MemOperand x86memop> {
3251 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003252 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003253 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3254 let mayLoad = 1 in
3255 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003256 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003257 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3258}
3259
3260defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3261 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3262 EVEX_CD8<32, CD8VF>;
3263defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3264 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3265 EVEX_CD8<32, CD8VF>;
3266
3267def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3268def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3269 (VMOVSHDUPZrm addr:$src)>;
3270def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3271def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3272 (VMOVSLDUPZrm addr:$src)>;
3273
3274//===----------------------------------------------------------------------===//
3275// Move Low to High and High to Low packed FP Instructions
3276//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003277def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3278 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003279 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003280 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3281 IIC_SSE_MOV_LH>, EVEX_4V;
3282def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3283 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003284 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003285 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3286 IIC_SSE_MOV_LH>, EVEX_4V;
3287
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003288let Predicates = [HasAVX512] in {
3289 // MOVLHPS patterns
3290 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3291 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3292 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3293 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003294
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003295 // MOVHLPS patterns
3296 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3297 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3298}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003299
3300//===----------------------------------------------------------------------===//
3301// FMA - Fused Multiply Operations
3302//
3303let Constraints = "$src1 = $dst" in {
3304multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3305 RegisterClass RC, X86MemOperand x86memop,
3306 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
Adam Nemet2e91ee52014-08-14 17:13:19 +00003307 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3308 RegisterClass KRC> {
3309 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3310 (ins RC:$src2, RC:$src3),
3311 OpcodeStr, "$src3, $src2", "$src2, $src3",
3312 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3313 AVX512FMA3Base;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003314
3315 let mayLoad = 1 in
3316 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3317 (ins RC:$src1, RC:$src2, x86memop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003318 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003319 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3320 (mem_frag addr:$src3))))]>;
3321 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3322 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003323 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003324 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3325 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3326 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3327}
3328} // Constraints = "$src1 = $dst"
3329
3330let ExeDomain = SSEPackedSingle in {
3331 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3332 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003333 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003334 EVEX_CD8<32, CD8VF>;
3335 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3336 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003337 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003338 EVEX_CD8<32, CD8VF>;
3339 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3340 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003341 X86Fmaddsub, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003342 EVEX_V512, EVEX_CD8<32, CD8VF>;
3343 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3344 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003345 X86Fmsubadd, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003346 EVEX_V512, EVEX_CD8<32, CD8VF>;
3347 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3348 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003349 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003350 EVEX_CD8<32, CD8VF>;
3351 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3352 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003353 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003354 EVEX_CD8<32, CD8VF>;
3355}
3356let ExeDomain = SSEPackedDouble in {
3357 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3358 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003359 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003360 VEX_W, EVEX_CD8<64, CD8VF>;
3361 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3362 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003363 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003364 EVEX_CD8<64, CD8VF>;
3365 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3366 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003367 X86Fmaddsub, v8f64, VK8WM>,
3368 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003369 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3370 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003371 X86Fmsubadd, v8f64, VK8WM>,
3372 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003373 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3374 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003375 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003376 EVEX_CD8<64, CD8VF>;
3377 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3378 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003379 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003380 EVEX_CD8<64, CD8VF>;
3381}
3382
3383let Constraints = "$src1 = $dst" in {
3384multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3385 RegisterClass RC, X86MemOperand x86memop,
3386 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3387 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3388 let mayLoad = 1 in
3389 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3390 (ins RC:$src1, RC:$src3, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003391 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003392 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3393 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3394 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003395 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003396 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3397 [(set RC:$dst, (OpNode RC:$src1,
3398 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3399}
3400} // Constraints = "$src1 = $dst"
3401
3402
3403let ExeDomain = SSEPackedSingle in {
3404 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3405 memopv16f32, f32mem, loadf32, "{1to16}",
3406 X86Fmadd, v16f32>, EVEX_V512,
3407 EVEX_CD8<32, CD8VF>;
3408 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3409 memopv16f32, f32mem, loadf32, "{1to16}",
3410 X86Fmsub, v16f32>, EVEX_V512,
3411 EVEX_CD8<32, CD8VF>;
3412 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3413 memopv16f32, f32mem, loadf32, "{1to16}",
3414 X86Fmaddsub, v16f32>,
3415 EVEX_V512, EVEX_CD8<32, CD8VF>;
3416 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3417 memopv16f32, f32mem, loadf32, "{1to16}",
3418 X86Fmsubadd, v16f32>,
3419 EVEX_V512, EVEX_CD8<32, CD8VF>;
3420 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3421 memopv16f32, f32mem, loadf32, "{1to16}",
3422 X86Fnmadd, v16f32>, EVEX_V512,
3423 EVEX_CD8<32, CD8VF>;
3424 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3425 memopv16f32, f32mem, loadf32, "{1to16}",
3426 X86Fnmsub, v16f32>, EVEX_V512,
3427 EVEX_CD8<32, CD8VF>;
3428}
3429let ExeDomain = SSEPackedDouble in {
3430 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3431 memopv8f64, f64mem, loadf64, "{1to8}",
3432 X86Fmadd, v8f64>, EVEX_V512,
3433 VEX_W, EVEX_CD8<64, CD8VF>;
3434 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3435 memopv8f64, f64mem, loadf64, "{1to8}",
3436 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3437 EVEX_CD8<64, CD8VF>;
3438 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3439 memopv8f64, f64mem, loadf64, "{1to8}",
3440 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3441 EVEX_CD8<64, CD8VF>;
3442 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3443 memopv8f64, f64mem, loadf64, "{1to8}",
3444 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3445 EVEX_CD8<64, CD8VF>;
3446 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3447 memopv8f64, f64mem, loadf64, "{1to8}",
3448 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3449 EVEX_CD8<64, CD8VF>;
3450 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3451 memopv8f64, f64mem, loadf64, "{1to8}",
3452 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3453 EVEX_CD8<64, CD8VF>;
3454}
3455
3456// Scalar FMA
3457let Constraints = "$src1 = $dst" in {
3458multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3459 RegisterClass RC, ValueType OpVT,
3460 X86MemOperand x86memop, Operand memop,
3461 PatFrag mem_frag> {
3462 let isCommutable = 1 in
3463 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3464 (ins RC:$src1, RC:$src2, RC:$src3),
3465 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003466 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003467 [(set RC:$dst,
3468 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3469 let mayLoad = 1 in
3470 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3471 (ins RC:$src1, RC:$src2, f128mem:$src3),
3472 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003473 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003474 [(set RC:$dst,
3475 (OpVT (OpNode RC:$src2, RC:$src1,
3476 (mem_frag addr:$src3))))]>;
3477}
3478
3479} // Constraints = "$src1 = $dst"
3480
Elena Demikhovskycf088092013-12-11 14:31:04 +00003481defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003482 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003483defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003484 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003485defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003486 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003487defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003488 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003489defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003490 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003491defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003492 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003493defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003494 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003495defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003496 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3497
3498//===----------------------------------------------------------------------===//
3499// AVX-512 Scalar convert from sign integer to float/double
3500//===----------------------------------------------------------------------===//
3501
3502multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3503 X86MemOperand x86memop, string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003504let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003505 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003506 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003507 EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003508 let mayLoad = 1 in
3509 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3510 (ins DstRC:$src1, x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003511 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003512 EVEX_4V;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003513} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003514}
Andrew Trick15a47742013-10-09 05:11:10 +00003515let Predicates = [HasAVX512] in {
Elena Demikhovskycf088092013-12-11 14:31:04 +00003516defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003517 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003518defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003519 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003520defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003521 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003522defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003523 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3524
3525def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3526 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3527def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003528 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003529def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3530 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3531def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003532 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003533
3534def : Pat<(f32 (sint_to_fp GR32:$src)),
3535 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3536def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003537 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003538def : Pat<(f64 (sint_to_fp GR32:$src)),
3539 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3540def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003541 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3542
Elena Demikhovskycf088092013-12-11 14:31:04 +00003543defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003544 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003545defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003546 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003547defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003548 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003549defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003550 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3551
3552def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3553 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3554def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3555 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3556def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3557 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3558def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3559 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3560
3561def : Pat<(f32 (uint_to_fp GR32:$src)),
3562 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3563def : Pat<(f32 (uint_to_fp GR64:$src)),
3564 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3565def : Pat<(f64 (uint_to_fp GR32:$src)),
3566 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3567def : Pat<(f64 (uint_to_fp GR64:$src)),
3568 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00003569}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003570
3571//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003572// AVX-512 Scalar convert from float/double to integer
3573//===----------------------------------------------------------------------===//
3574multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3575 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3576 string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003577let hasSideEffects = 0 in {
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003578 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003579 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003580 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3581 Requires<[HasAVX512]>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003582 let mayLoad = 1 in
3583 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003584 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003585 Requires<[HasAVX512]>;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003586} // hasSideEffects = 0
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003587}
3588let Predicates = [HasAVX512] in {
3589// Convert float/double to signed/unsigned int 32/64
3590defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003591 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003592 XS, EVEX_CD8<32, CD8VT1>;
3593defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003594 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003595 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3596defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003597 ssmem, sse_load_f32, "cvtss2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003598 XS, EVEX_CD8<32, CD8VT1>;
3599defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3600 int_x86_avx512_cvtss2usi64, ssmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003601 sse_load_f32, "cvtss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003602 EVEX_CD8<32, CD8VT1>;
3603defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003604 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003605 XD, EVEX_CD8<64, CD8VT1>;
3606defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003607 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003608 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3609defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003610 sdmem, sse_load_f64, "cvtsd2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003611 XD, EVEX_CD8<64, CD8VT1>;
3612defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3613 int_x86_avx512_cvtsd2usi64, sdmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003614 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003615 EVEX_CD8<64, CD8VT1>;
3616
Craig Topper9dd48c82014-01-02 17:28:14 +00003617let isCodeGenOnly = 1 in {
3618 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3619 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3620 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3621 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3622 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3623 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3624 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3625 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3626 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3627 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3628 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3629 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003630
Craig Topper9dd48c82014-01-02 17:28:14 +00003631 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3632 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3633 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3634 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3635 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3636 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3637 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3638 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3639 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3640 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3641 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3642 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3643} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003644
3645// Convert float/double to signed/unsigned int 32/64 with truncation
Craig Topper9dd48c82014-01-02 17:28:14 +00003646let isCodeGenOnly = 1 in {
3647 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3648 ssmem, sse_load_f32, "cvttss2si">,
3649 XS, EVEX_CD8<32, CD8VT1>;
3650 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3651 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3652 "cvttss2si">, XS, VEX_W,
3653 EVEX_CD8<32, CD8VT1>;
3654 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3655 sdmem, sse_load_f64, "cvttsd2si">, XD,
3656 EVEX_CD8<64, CD8VT1>;
3657 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3658 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3659 "cvttsd2si">, XD, VEX_W,
3660 EVEX_CD8<64, CD8VT1>;
3661 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3662 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3663 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3664 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3665 int_x86_avx512_cvttss2usi64, ssmem,
3666 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3667 EVEX_CD8<32, CD8VT1>;
3668 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3669 int_x86_avx512_cvttsd2usi,
3670 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3671 EVEX_CD8<64, CD8VT1>;
3672 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3673 int_x86_avx512_cvttsd2usi64, sdmem,
3674 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3675 EVEX_CD8<64, CD8VT1>;
3676} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003677
3678multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3679 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3680 string asm> {
3681 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003682 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003683 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3684 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003685 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003686 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3687}
3688
3689defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003690 loadf32, "cvttss2si">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003691 EVEX_CD8<32, CD8VT1>;
3692defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003693 loadf32, "cvttss2usi">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003694 EVEX_CD8<32, CD8VT1>;
3695defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003696 loadf32, "cvttss2si">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003697 EVEX_CD8<32, CD8VT1>;
3698defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003699 loadf32, "cvttss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003700 EVEX_CD8<32, CD8VT1>;
3701defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003702 loadf64, "cvttsd2si">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003703 EVEX_CD8<64, CD8VT1>;
3704defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003705 loadf64, "cvttsd2usi">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003706 EVEX_CD8<64, CD8VT1>;
3707defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003708 loadf64, "cvttsd2si">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003709 EVEX_CD8<64, CD8VT1>;
3710defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003711 loadf64, "cvttsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003712 EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003713} // HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003714//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003715// AVX-512 Convert form float to double and back
3716//===----------------------------------------------------------------------===//
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003717let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003718def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3719 (ins FR32X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003720 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003721 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3722let mayLoad = 1 in
3723def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3724 (ins FR32X:$src1, f32mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003725 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003726 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3727 EVEX_CD8<32, CD8VT1>;
3728
3729// Convert scalar double to scalar single
3730def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3731 (ins FR64X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003732 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003733 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3734let mayLoad = 1 in
3735def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3736 (ins FR64X:$src1, f64mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003737 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003738 []>, EVEX_4V, VEX_LIG, VEX_W,
3739 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3740}
3741
3742def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3743 Requires<[HasAVX512]>;
3744def : Pat<(fextend (loadf32 addr:$src)),
3745 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3746
3747def : Pat<(extloadf32 addr:$src),
3748 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3749 Requires<[HasAVX512, OptForSize]>;
3750
3751def : Pat<(extloadf32 addr:$src),
3752 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3753 Requires<[HasAVX512, OptForSpeed]>;
3754
3755def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3756 Requires<[HasAVX512]>;
3757
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003758multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003759 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3760 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3761 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003762let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003763 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003764 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003765 [(set DstRC:$dst,
3766 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003767 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003768 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003769 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003770 let mayLoad = 1 in
3771 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003772 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003773 [(set DstRC:$dst,
3774 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003775} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003776}
3777
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003778multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003779 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3780 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3781 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003782let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003783 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003784 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003785 [(set DstRC:$dst,
3786 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3787 let mayLoad = 1 in
3788 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003789 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003790 [(set DstRC:$dst,
3791 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003792} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003793}
3794
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003795defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003796 memopv8f64, f512mem, v8f32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003797 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003798 EVEX_CD8<64, CD8VF>;
3799
3800defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3801 memopv4f64, f256mem, v8f64, v8f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003802 SSEPackedDouble>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003803 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003804def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3805 (VCVTPS2PDZrm addr:$src)>;
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00003806
3807def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3808 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3809 (VCVTPD2PSZrr VR512:$src)>;
3810
3811def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3812 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3813 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003814
3815//===----------------------------------------------------------------------===//
3816// AVX-512 Vector convert from sign integer to float/double
3817//===----------------------------------------------------------------------===//
3818
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003819defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003820 memopv8i64, i512mem, v16f32, v16i32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003821 SSEPackedSingle>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003822 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003823
3824defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3825 memopv4i64, i256mem, v8f64, v8i32,
3826 SSEPackedDouble>, EVEX_V512, XS,
3827 EVEX_CD8<32, CD8VH>;
3828
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003829defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003830 memopv16f32, f512mem, v16i32, v16f32,
3831 SSEPackedSingle>, EVEX_V512, XS,
3832 EVEX_CD8<32, CD8VF>;
3833
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003834defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003835 memopv8f64, f512mem, v8i32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003836 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003837 EVEX_CD8<64, CD8VF>;
3838
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003839defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003840 memopv16f32, f512mem, v16i32, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003841 SSEPackedSingle>, EVEX_V512, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003842 EVEX_CD8<32, CD8VF>;
3843
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003844// cvttps2udq (src, 0, mask-all-ones, sae-current)
3845def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3846 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3847 (VCVTTPS2UDQZrr VR512:$src)>;
3848
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003849defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003850 memopv8f64, f512mem, v8i32, v8f64,
Craig Topper5ccb6172014-02-18 00:21:49 +00003851 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003852 EVEX_CD8<64, CD8VF>;
3853
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003854// cvttpd2udq (src, 0, mask-all-ones, sae-current)
3855def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3856 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3857 (VCVTTPD2UDQZrr VR512:$src)>;
3858
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003859defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3860 memopv4i64, f256mem, v8f64, v8i32,
3861 SSEPackedDouble>, EVEX_V512, XS,
3862 EVEX_CD8<32, CD8VH>;
3863
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003864defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003865 memopv16i32, f512mem, v16f32, v16i32,
3866 SSEPackedSingle>, EVEX_V512, XD,
3867 EVEX_CD8<32, CD8VF>;
3868
3869def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3870 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3871 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3872
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00003873def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3874 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3875 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3876
3877def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3878 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3879 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3880
3881def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3882 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3883 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003884
Cameron McInallyf10a7c92014-06-18 14:04:37 +00003885def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3886 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3887 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3888
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003889def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003890 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003891 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003892def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3893 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3894 (VCVTDQ2PDZrr VR256X:$src)>;
3895def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3896 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3897 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3898def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3899 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3900 (VCVTUDQ2PDZrr VR256X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003901
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003902multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3903 RegisterClass DstRC, PatFrag mem_frag,
3904 X86MemOperand x86memop, Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003905let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003906 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003907 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003908 [], d>, EVEX;
3909 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003910 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003911 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003912 let mayLoad = 1 in
3913 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003914 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003915 [], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003916} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003917}
3918
3919defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
Craig Topperae11aed2014-01-14 07:41:20 +00003920 memopv16f32, f512mem, SSEPackedSingle>, PD,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003921 EVEX_V512, EVEX_CD8<32, CD8VF>;
3922defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3923 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3924 EVEX_V512, EVEX_CD8<64, CD8VF>;
3925
3926def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3927 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3928 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3929
3930def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3931 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3932 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3933
3934defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3935 memopv16f32, f512mem, SSEPackedSingle>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003936 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003937defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3938 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
Craig Topper5ccb6172014-02-18 00:21:49 +00003939 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003940
3941def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3942 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3943 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3944
3945def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3946 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3947 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003948
3949let Predicates = [HasAVX512] in {
3950 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3951 (VCVTPD2PSZrm addr:$src)>;
3952 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3953 (VCVTPS2PDZrm addr:$src)>;
3954}
3955
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003956//===----------------------------------------------------------------------===//
3957// Half precision conversion instructions
3958//===----------------------------------------------------------------------===//
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003959multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3960 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003961 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3962 "vcvtph2ps\t{$src, $dst|$dst, $src}",
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003963 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003964 let hasSideEffects = 0, mayLoad = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003965 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3966 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3967}
3968
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003969multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3970 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003971 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3972 (ins srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003973 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3974 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003975 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003976 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3977 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003978 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003979}
3980
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003981defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003982 EVEX_CD8<32, CD8VH>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003983defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003984 EVEX_CD8<32, CD8VH>;
3985
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003986def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3987 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3988 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3989
3990def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3991 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3992 (VCVTPH2PSZrr VR256X:$src)>;
3993
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003994let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3995 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003996 "ucomiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003997 EVEX_CD8<32, CD8VT1>;
3998 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Craig Topperae11aed2014-01-14 07:41:20 +00003999 "ucomisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004000 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4001 let Pattern = []<dag> in {
4002 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
Craig Topper5ccb6172014-02-18 00:21:49 +00004003 "comiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004004 EVEX_CD8<32, CD8VT1>;
4005 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
Craig Topperae11aed2014-01-14 07:41:20 +00004006 "comisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004007 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4008 }
Craig Topper9dd48c82014-01-02 17:28:14 +00004009 let isCodeGenOnly = 1 in {
4010 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00004011 load, "ucomiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00004012 EVEX_CD8<32, CD8VT1>;
4013 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00004014 load, "ucomisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00004015 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004016
Craig Topper9dd48c82014-01-02 17:28:14 +00004017 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00004018 load, "comiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00004019 EVEX_CD8<32, CD8VT1>;
4020 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00004021 load, "comisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00004022 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4023 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004024}
4025
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004026/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
4027multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4028 X86MemOperand x86memop> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004029 let hasSideEffects = 0 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004030 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4031 (ins RC:$src1, RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004032 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004033 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004034 let mayLoad = 1 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004035 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4036 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004037 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004038 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004039 }
4040}
4041}
4042
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004043defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
4044 EVEX_CD8<32, CD8VT1>;
4045defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
4046 VEX_W, EVEX_CD8<64, CD8VT1>;
4047defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
4048 EVEX_CD8<32, CD8VT1>;
4049defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
4050 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004051
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004052def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
4053 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4054 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4055 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004056
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004057def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
4058 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4059 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4060 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004061
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004062def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
4063 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4064 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4065 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004066
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004067def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
4068 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4069 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4070 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004071
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004072/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
4073multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
4074 RegisterClass RC, X86MemOperand x86memop,
4075 PatFrag mem_frag, ValueType OpVt> {
4076 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4077 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004078 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004079 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
4080 EVEX;
4081 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004082 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004083 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
4084 EVEX;
4085}
4086defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
4087 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4088defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
4089 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4090defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
4091 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4092defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
4093 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4094
4095def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
4096 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4097 (VRSQRT14PSZr VR512:$src)>;
4098def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
4099 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4100 (VRSQRT14PDZr VR512:$src)>;
4101
4102def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4103 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4104 (VRCP14PSZr VR512:$src)>;
4105def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4106 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4107 (VRCP14PDZr VR512:$src)>;
4108
4109/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4110multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4111 X86MemOperand x86memop> {
4112 let hasSideEffects = 0, Predicates = [HasERI] in {
4113 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4114 (ins RC:$src1, RC:$src2),
4115 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004116 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004117 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4118 (ins RC:$src1, RC:$src2),
4119 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004120 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004121 []>, EVEX_4V, EVEX_B;
4122 let mayLoad = 1 in {
4123 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4124 (ins RC:$src1, x86memop:$src2),
4125 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004126 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004127 }
4128}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004129}
4130
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004131defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4132 EVEX_CD8<32, CD8VT1>;
4133defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4134 VEX_W, EVEX_CD8<64, CD8VT1>;
4135defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4136 EVEX_CD8<32, CD8VT1>;
4137defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4138 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004139
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004140def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4141 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4142 FROUND_NO_EXC)),
4143 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4144 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4145
4146def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4147 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4148 FROUND_NO_EXC)),
4149 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4150 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4151
4152def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4153 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4154 FROUND_NO_EXC)),
4155 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4156 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4157
4158def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4159 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4160 FROUND_NO_EXC)),
4161 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4162 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4163
4164/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4165multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4166 RegisterClass RC, X86MemOperand x86memop> {
4167 let hasSideEffects = 0, Predicates = [HasERI] in {
4168 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4169 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004170 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004171 []>, EVEX;
4172 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4173 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004174 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004175 []>, EVEX, EVEX_B;
4176 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004177 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004178 []>, EVEX;
4179 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004180}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004181defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4182 EVEX_V512, EVEX_CD8<32, CD8VF>;
4183defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4184 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4185defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4186 EVEX_V512, EVEX_CD8<32, CD8VF>;
4187defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4188 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4189
4190def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4191 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4192 (VRSQRT28PSZrb VR512:$src)>;
4193def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4194 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4195 (VRSQRT28PDZrb VR512:$src)>;
4196
4197def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4198 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4199 (VRCP28PSZrb VR512:$src)>;
4200def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4201 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4202 (VRCP28PDZrb VR512:$src)>;
4203
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004204multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004205 OpndItins itins_s, OpndItins itins_d> {
4206 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004207 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004208 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4209 EVEX, EVEX_V512;
4210
4211 let mayLoad = 1 in
4212 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004213 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004214 [(set VR512:$dst,
4215 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4216 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4217
4218 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004219 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004220 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4221 EVEX, EVEX_V512;
4222
4223 let mayLoad = 1 in
4224 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004225 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004226 [(set VR512:$dst, (OpNode
4227 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4228 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4229
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004230}
4231
4232multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4233 Intrinsic F32Int, Intrinsic F64Int,
4234 OpndItins itins_s, OpndItins itins_d> {
4235 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4236 (ins FR32X:$src1, FR32X:$src2),
4237 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004238 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004239 [], itins_s.rr>, XS, EVEX_4V;
Craig Topper9dd48c82014-01-02 17:28:14 +00004240 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004241 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4242 (ins VR128X:$src1, VR128X:$src2),
4243 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004244 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004245 [(set VR128X:$dst,
4246 (F32Int VR128X:$src1, VR128X:$src2))],
4247 itins_s.rr>, XS, EVEX_4V;
4248 let mayLoad = 1 in {
4249 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4250 (ins FR32X:$src1, f32mem:$src2),
4251 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004252 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004253 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004254 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004255 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4256 (ins VR128X:$src1, ssmem:$src2),
4257 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004258 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004259 [(set VR128X:$dst,
4260 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4261 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4262 }
4263 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4264 (ins FR64X:$src1, FR64X:$src2),
4265 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004266 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004267 XD, EVEX_4V, VEX_W;
Craig Topper9dd48c82014-01-02 17:28:14 +00004268 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004269 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4270 (ins VR128X:$src1, VR128X:$src2),
4271 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004272 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004273 [(set VR128X:$dst,
4274 (F64Int VR128X:$src1, VR128X:$src2))],
4275 itins_s.rr>, XD, EVEX_4V, VEX_W;
4276 let mayLoad = 1 in {
4277 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4278 (ins FR64X:$src1, f64mem:$src2),
4279 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004280 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004281 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004282 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004283 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4284 (ins VR128X:$src1, sdmem:$src2),
4285 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004286 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004287 [(set VR128X:$dst,
4288 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4289 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4290 }
4291}
4292
4293
4294defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4295 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4296 SSE_SQRTSS, SSE_SQRTSD>,
4297 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004298 SSE_SQRTPS, SSE_SQRTPD>;
4299
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004300let Predicates = [HasAVX512] in {
Elena Demikhovskyf1648592014-07-22 11:07:31 +00004301 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4302 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4303 (VSQRTPSZrr VR512:$src1)>;
4304 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4305 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4306 (VSQRTPDZrr VR512:$src1)>;
4307
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004308 def : Pat<(f32 (fsqrt FR32X:$src)),
4309 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4310 def : Pat<(f32 (fsqrt (load addr:$src))),
4311 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4312 Requires<[OptForSize]>;
4313 def : Pat<(f64 (fsqrt FR64X:$src)),
4314 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4315 def : Pat<(f64 (fsqrt (load addr:$src))),
4316 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4317 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004318
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004319 def : Pat<(f32 (X86frsqrt FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004320 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004321 def : Pat<(f32 (X86frsqrt (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004322 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004323 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004324
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004325 def : Pat<(f32 (X86frcp FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004326 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004327 def : Pat<(f32 (X86frcp (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004328 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004329 Requires<[OptForSize]>;
4330
4331 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4332 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4333 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4334 VR128X)>;
4335 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4336 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4337
4338 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4339 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4340 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4341 VR128X)>;
4342 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4343 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4344}
4345
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004346
4347multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4348 X86MemOperand x86memop, RegisterClass RC,
4349 PatFrag mem_frag32, PatFrag mem_frag64,
4350 Intrinsic V4F32Int, Intrinsic V2F64Int,
4351 CD8VForm VForm> {
4352let ExeDomain = SSEPackedSingle in {
4353 // Intrinsic operation, reg.
4354 // Vector intrinsic operation, reg
4355 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4356 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4357 !strconcat(OpcodeStr,
4358 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4359 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4360
4361 // Vector intrinsic operation, mem
4362 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4363 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4364 !strconcat(OpcodeStr,
4365 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4366 [(set RC:$dst,
4367 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4368 EVEX_CD8<32, VForm>;
4369} // ExeDomain = SSEPackedSingle
4370
4371let ExeDomain = SSEPackedDouble in {
4372 // Vector intrinsic operation, reg
4373 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4374 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4375 !strconcat(OpcodeStr,
4376 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4377 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4378
4379 // Vector intrinsic operation, mem
4380 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4381 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4382 !strconcat(OpcodeStr,
4383 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4384 [(set RC:$dst,
4385 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4386 EVEX_CD8<64, VForm>;
4387} // ExeDomain = SSEPackedDouble
4388}
4389
4390multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4391 string OpcodeStr,
4392 Intrinsic F32Int,
4393 Intrinsic F64Int> {
4394let ExeDomain = GenericDomain in {
4395 // Operation, reg.
4396 let hasSideEffects = 0 in
4397 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4398 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4399 !strconcat(OpcodeStr,
4400 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4401 []>;
4402
4403 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004404 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004405 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4406 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4407 !strconcat(OpcodeStr,
4408 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4409 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4410
4411 // Intrinsic operation, mem.
4412 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4413 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4414 !strconcat(OpcodeStr,
4415 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4416 [(set VR128X:$dst, (F32Int VR128X:$src1,
4417 sse_load_f32:$src2, imm:$src3))]>,
4418 EVEX_CD8<32, CD8VT1>;
4419
4420 // Operation, reg.
4421 let hasSideEffects = 0 in
4422 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4423 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4424 !strconcat(OpcodeStr,
4425 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4426 []>, VEX_W;
4427
4428 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004429 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004430 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4431 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4432 !strconcat(OpcodeStr,
4433 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4434 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4435 VEX_W;
4436
4437 // Intrinsic operation, mem.
4438 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4439 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4440 !strconcat(OpcodeStr,
4441 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4442 [(set VR128X:$dst,
4443 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4444 VEX_W, EVEX_CD8<64, CD8VT1>;
4445} // ExeDomain = GenericDomain
4446}
4447
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004448multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4449 X86MemOperand x86memop, RegisterClass RC,
4450 PatFrag mem_frag, Domain d> {
4451let ExeDomain = d in {
4452 // Intrinsic operation, reg.
4453 // Vector intrinsic operation, reg
4454 def r : AVX512AIi8<opc, MRMSrcReg,
4455 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4456 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004457 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004458 []>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004459
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004460 // Vector intrinsic operation, mem
4461 def m : AVX512AIi8<opc, MRMSrcMem,
4462 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4463 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004464 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004465 []>, EVEX;
4466} // ExeDomain
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004467}
4468
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004469
4470defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4471 memopv16f32, SSEPackedSingle>, EVEX_V512,
4472 EVEX_CD8<32, CD8VF>;
4473
4474def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004475 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004476 FROUND_CURRENT)),
4477 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4478
4479
4480defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4481 memopv8f64, SSEPackedDouble>, EVEX_V512,
4482 VEX_W, EVEX_CD8<64, CD8VF>;
4483
4484def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004485 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004486 FROUND_CURRENT)),
4487 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4488
4489multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4490 Operand x86memop, RegisterClass RC, Domain d> {
4491let ExeDomain = d in {
4492 def r : AVX512AIi8<opc, MRMSrcReg,
4493 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4494 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004495 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004496 []>, EVEX_4V;
4497
4498 def m : AVX512AIi8<opc, MRMSrcMem,
4499 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4500 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004501 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004502 []>, EVEX_4V;
4503} // ExeDomain
4504}
4505
4506defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4507 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4508
4509defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4510 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4511
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004512def : Pat<(ffloor FR32X:$src),
4513 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4514def : Pat<(f64 (ffloor FR64X:$src)),
4515 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4516def : Pat<(f32 (fnearbyint FR32X:$src)),
4517 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4518def : Pat<(f64 (fnearbyint FR64X:$src)),
4519 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4520def : Pat<(f32 (fceil FR32X:$src)),
4521 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4522def : Pat<(f64 (fceil FR64X:$src)),
4523 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4524def : Pat<(f32 (frint FR32X:$src)),
4525 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4526def : Pat<(f64 (frint FR64X:$src)),
4527 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4528def : Pat<(f32 (ftrunc FR32X:$src)),
4529 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4530def : Pat<(f64 (ftrunc FR64X:$src)),
4531 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4532
4533def : Pat<(v16f32 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004534 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004535def : Pat<(v16f32 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004536 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004537def : Pat<(v16f32 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004538 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004539def : Pat<(v16f32 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004540 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004541def : Pat<(v16f32 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004542 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004543
4544def : Pat<(v8f64 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004545 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004546def : Pat<(v8f64 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004547 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004548def : Pat<(v8f64 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004549 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004550def : Pat<(v8f64 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004551 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004552def : Pat<(v8f64 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004553 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004554
4555//-------------------------------------------------
4556// Integer truncate and extend operations
4557//-------------------------------------------------
4558
4559multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4560 RegisterClass dstRC, RegisterClass srcRC,
4561 RegisterClass KRC, X86MemOperand x86memop> {
4562 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4563 (ins srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004564 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004565 []>, EVEX;
4566
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004567 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4568 (ins KRC:$mask, srcRC:$src),
4569 !strconcat(OpcodeStr,
4570 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4571 []>, EVEX, EVEX_K;
4572
4573 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004574 (ins KRC:$mask, srcRC:$src),
4575 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004576 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004577 []>, EVEX, EVEX_KZ;
4578
4579 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004580 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004581 []>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004582
4583 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4584 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4585 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4586 []>, EVEX, EVEX_K;
4587
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004588}
4589defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4590 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4591defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4592 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4593defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4594 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4595defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4596 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4597defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4598 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4599defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4600 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4601defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4602 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4603defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4604 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4605defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4606 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4607defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4608 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4609defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4610 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4611defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4612 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4613defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4614 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4615defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4616 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4617defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4618 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4619
4620def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4621def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4622def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4623def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4624def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4625
4626def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004627 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004628def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004629 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004630def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004631 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004632def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004633 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004634
4635
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004636multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4637 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4638 PatFrag mem_frag, X86MemOperand x86memop,
4639 ValueType OpVT, ValueType InVT> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004640
4641 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4642 (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004643 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004644 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004645
4646 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4647 (ins KRC:$mask, SrcRC:$src),
4648 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4649 []>, EVEX, EVEX_K;
4650
4651 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4652 (ins KRC:$mask, SrcRC:$src),
4653 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4654 []>, EVEX, EVEX_KZ;
4655
4656 let mayLoad = 1 in {
4657 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004658 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004659 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004660 [(set DstRC:$dst,
4661 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4662 EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004663
4664 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4665 (ins KRC:$mask, x86memop:$src),
4666 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4667 []>,
4668 EVEX, EVEX_K;
4669
4670 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4671 (ins KRC:$mask, x86memop:$src),
4672 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4673 []>,
4674 EVEX, EVEX_KZ;
4675 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004676}
4677
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004678defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004679 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4680 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004681defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004682 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4683 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004684defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004685 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4686 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004687defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004688 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4689 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004690defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004691 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4692 EVEX_CD8<32, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004693
4694defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004695 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4696 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004697defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004698 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4699 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004700defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004701 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4702 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004703defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004704 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4705 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004706defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004707 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4708 EVEX_CD8<32, CD8VH>;
4709
4710//===----------------------------------------------------------------------===//
4711// GATHER - SCATTER Operations
4712
4713multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4714 RegisterClass RC, X86MemOperand memop> {
4715let mayLoad = 1,
4716 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4717 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4718 (ins RC:$src1, KRC:$mask, memop:$src2),
4719 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004720 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004721 []>, EVEX, EVEX_K;
4722}
Cameron McInally45325962014-03-26 13:50:50 +00004723
4724let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004725defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4726 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004727defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4728 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004729}
4730
4731let ExeDomain = SSEPackedSingle in {
4732defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4733 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004734defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4735 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004736}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004737
4738defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4739 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4740defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4741 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4742
4743defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4744 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4745defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4746 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4747
4748multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4749 RegisterClass RC, X86MemOperand memop> {
4750let mayStore = 1, Constraints = "$mask = $mask_wb" in
4751 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4752 (ins memop:$dst, KRC:$mask, RC:$src2),
4753 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004754 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004755 []>, EVEX, EVEX_K;
4756}
4757
Cameron McInally45325962014-03-26 13:50:50 +00004758let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004759defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4760 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004761defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4762 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004763}
4764
4765let ExeDomain = SSEPackedSingle in {
4766defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4767 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004768defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4769 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004770}
4771
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004772defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4773 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4774defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4775 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4776
4777defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4778 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4779defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4780 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4781
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00004782// prefetch
4783multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4784 RegisterClass KRC, X86MemOperand memop> {
4785 let Predicates = [HasPFI], hasSideEffects = 1 in
4786 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4787 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4788 []>, EVEX, EVEX_K;
4789}
4790
4791defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4792 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4793
4794defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4795 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4796
4797defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4798 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4799
4800defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4801 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4802
4803defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4804 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4805
4806defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4807 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4808
4809defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4810 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4811
4812defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4813 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4814
4815defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4816 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4817
4818defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4819 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4820
4821defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4822 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4823
4824defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4825 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4826
4827defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4828 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4829
4830defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4831 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4832
4833defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4834 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4835
4836defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4837 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004838//===----------------------------------------------------------------------===//
4839// VSHUFPS - VSHUFPD Operations
4840
4841multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4842 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4843 Domain d> {
4844 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4845 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4846 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004847 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004848 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4849 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004850 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004851 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4852 (ins RC:$src1, RC:$src2, i8imm:$src3),
4853 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004854 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004855 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4856 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004857 EVEX_4V, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004858}
4859
4860defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004861 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004862defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00004863 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004864
Elena Demikhovsky462a2d22013-10-06 06:11:18 +00004865def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4866 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4867def : Pat<(v16i32 (X86Shufp VR512:$src1,
4868 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4869 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4870
4871def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4872 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4873def : Pat<(v8i64 (X86Shufp VR512:$src1,
4874 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4875 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004876
Adam Nemet5ed17da2014-08-21 19:50:07 +00004877multiclass avx512_valign<X86VectorVTInfo _> {
4878 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4879 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4880 "valign"##_.Suffix,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004881 "$src3, $src2, $src1", "$src1, $src2, $src3",
Adam Nemet5ed17da2014-08-21 19:50:07 +00004882 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4883 (i8 imm:$src3))),
4884 _.VT, _.RC, _.KRCWM>,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004885 AVX512AIi8Base, EVEX_4V;
Adam Nemetfd2161b2014-08-05 17:23:04 +00004886
Adam Nemetf92139d2014-08-05 17:22:50 +00004887 // Also match valign of packed floats.
Adam Nemet5ed17da2014-08-21 19:50:07 +00004888 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4889 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
Adam Nemetf92139d2014-08-05 17:22:50 +00004890
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00004891 let mayLoad = 1 in
Adam Nemet5ed17da2014-08-21 19:50:07 +00004892 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4893 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4894 !strconcat("valign"##_.Suffix,
Adam Nemet1c752d82014-08-05 17:22:47 +00004895 " \t{$src3, $src2, $src1, $dst|"
4896 "$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004897 []>, EVEX_4V;
4898}
Adam Nemet5ed17da2014-08-21 19:50:07 +00004899defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4900defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004901
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004902// Helper fragments to match sext vXi1 to vXiY.
4903def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4904def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4905
4906multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4907 RegisterClass KRC, RegisterClass RC,
4908 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4909 string BrdcstStr> {
4910 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4911 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4912 []>, EVEX;
4913 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4914 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4915 []>, EVEX, EVEX_K;
4916 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4917 !strconcat(OpcodeStr,
4918 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4919 []>, EVEX, EVEX_KZ;
4920 let mayLoad = 1 in {
4921 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4922 (ins x86memop:$src),
4923 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4924 []>, EVEX;
4925 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4926 (ins KRC:$mask, x86memop:$src),
4927 !strconcat(OpcodeStr,
4928 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4929 []>, EVEX, EVEX_K;
4930 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4931 (ins KRC:$mask, x86memop:$src),
4932 !strconcat(OpcodeStr,
4933 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4934 []>, EVEX, EVEX_KZ;
4935 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4936 (ins x86scalar_mop:$src),
4937 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4938 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4939 []>, EVEX, EVEX_B;
4940 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4941 (ins KRC:$mask, x86scalar_mop:$src),
4942 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4943 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4944 []>, EVEX, EVEX_B, EVEX_K;
4945 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4946 (ins KRC:$mask, x86scalar_mop:$src),
4947 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4948 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4949 BrdcstStr, "}"),
4950 []>, EVEX, EVEX_B, EVEX_KZ;
4951 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004952}
4953
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004954defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4955 i512mem, i32mem, "{1to16}">, EVEX_V512,
4956 EVEX_CD8<32, CD8VF>;
4957defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4958 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4959 EVEX_CD8<64, CD8VF>;
4960
4961def : Pat<(xor
4962 (bc_v16i32 (v16i1sextv16i32)),
4963 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4964 (VPABSDZrr VR512:$src)>;
4965def : Pat<(xor
4966 (bc_v8i64 (v8i1sextv8i64)),
4967 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4968 (VPABSQZrr VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004969
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004970def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4971 (v16i32 immAllZerosV), (i16 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004972 (VPABSDZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004973def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4974 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004975 (VPABSQZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004976
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004977multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004978 RegisterClass RC, RegisterClass KRC,
4979 X86MemOperand x86memop,
4980 X86MemOperand x86scalar_mop, string BrdcstStr> {
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004981 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4982 (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004983 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004984 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004985 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4986 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004987 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004988 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004989 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4990 (ins x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004991 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004992 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4993 []>, EVEX, EVEX_B;
4994 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4995 (ins KRC:$mask, RC:$src),
4996 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004997 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004998 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004999 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5000 (ins KRC:$mask, x86memop:$src),
5001 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005002 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005003 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005004 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5005 (ins KRC:$mask, x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005006 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005007 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
5008 BrdcstStr, "}"),
5009 []>, EVEX, EVEX_KZ, EVEX_B;
5010
5011 let Constraints = "$src1 = $dst" in {
5012 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
5013 (ins RC:$src1, KRC:$mask, RC:$src2),
5014 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005015 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005016 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005017 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5018 (ins RC:$src1, KRC:$mask, x86memop:$src2),
5019 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005020 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005021 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005022 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5023 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005024 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005025 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
5026 []>, EVEX, EVEX_K, EVEX_B;
5027 }
5028}
5029
5030let Predicates = [HasCDI] in {
5031defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005032 i512mem, i32mem, "{1to16}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005033 EVEX_V512, EVEX_CD8<32, CD8VF>;
5034
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005035
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005036defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005037 i512mem, i64mem, "{1to8}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005038 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005039
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005040}
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005041
5042def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
5043 GR16:$mask),
5044 (VPCONFLICTDrrk VR512:$src1,
5045 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5046
5047def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
5048 GR8:$mask),
5049 (VPCONFLICTQrrk VR512:$src1,
5050 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005051
Cameron McInally5d1b7b92014-06-11 12:54:45 +00005052let Predicates = [HasCDI] in {
5053defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
5054 i512mem, i32mem, "{1to16}">,
5055 EVEX_V512, EVEX_CD8<32, CD8VF>;
5056
5057
5058defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
5059 i512mem, i64mem, "{1to8}">,
5060 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5061
5062}
5063
5064def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
5065 GR16:$mask),
5066 (VPLZCNTDrrk VR512:$src1,
5067 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5068
5069def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
5070 GR8:$mask),
5071 (VPLZCNTQrrk VR512:$src1,
5072 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5073
Cameron McInally0d0489c2014-06-16 14:12:28 +00005074def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
5075 (VPLZCNTDrm addr:$src)>;
5076def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
5077 (VPLZCNTDrr VR512:$src)>;
5078def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
5079 (VPLZCNTQrm addr:$src)>;
5080def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
5081 (VPLZCNTQrr VR512:$src)>;
5082
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005083def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5084def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5085def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Elena Demikhovskyacc5c9e2014-04-22 14:13:10 +00005086
5087def : Pat<(store VK1:$src, addr:$dst),
5088 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
5089
5090def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
5091 (truncstore node:$val, node:$ptr), [{
5092 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
5093}]>;
5094
5095def : Pat<(truncstorei1 GR8:$src, addr:$dst),
5096 (MOV8mr addr:$dst, GR8:$src)>;
5097