blob: dbb58bb8ba8f8337689c1bd3241214c4ecfa6f08 [file] [log] [blame]
Adam Nemet5ed17da2014-08-21 19:50:07 +00001// Group template arguments that can be derived from the vector type (EltNum x
2// EltVT). These are things like the register class for the writemask, etc.
3// The idea is to pass one of these as the template argument rather than the
4// individual arguments.
5class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
6 string suffix = ""> {
7 RegisterClass RC = rc;
8
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
11
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
14
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
17 // !lt in tablegen.
18 RegisterClass MRC =
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
21
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
24
Robert Khasanov2ea081d2014-08-25 14:49:34 +000025 string VTName = "v" # NumElts # EltVT;
26
Adam Nemet5ed17da2014-08-21 19:50:07 +000027 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000028 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000029
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000032 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000034
35 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000036 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000037
38 // Size of RC in bits, e.g. 512 for VR512.
39 int Size = VT.Size;
40
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000043 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
44
45 // Load patterns
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
52 VTName)), VTName));
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
60 VTName,
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
63 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +000064
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
67}
68
Robert Khasanov2ea081d2014-08-25 14:49:34 +000069def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
70def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +000071def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
72def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
73
Robert Khasanov2ea081d2014-08-25 14:49:34 +000074// "x" in v32i8x_info means RC = VR256X
75def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
76def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
77def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
78def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
79
80def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
81def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
82def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
83def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
84
85class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
86 X86VectorVTInfo i128> {
87 X86VectorVTInfo info512 = i512;
88 X86VectorVTInfo info256 = i256;
89 X86VectorVTInfo info128 = i128;
90}
91
92def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
93 v16i8x_info>;
94def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
95 v8i16x_info>;
96def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
97 v4i32x_info>;
98def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
99 v2i64x_info>;
100
101
Adam Nemet2e91ee52014-08-14 17:13:19 +0000102// Common base class of AVX512_masking and AVX512_masking_3src.
103multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
104 dag MaskingIns, dag ZeroMaskingIns,
105 string OpcodeStr,
106 string AttSrcAsm, string IntelSrcAsm,
107 dag RHS, dag MaskingRHS, ValueType OpVT,
108 RegisterClass RC, RegisterClass KRC,
109 string MaskingConstraint = ""> {
Adam Nemet2e2537f2014-08-07 17:53:55 +0000110 def NAME: AVX512<O, F, Outs, Ins,
111 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
112 "$dst, "#IntelSrcAsm#"}",
113 [(set RC:$dst, RHS)]>;
114
Adam Nemetfa1f7202014-08-07 23:18:18 +0000115 // Prefer over VMOV*rrk Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000116 let AddedComplexity = 20 in
117 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Adam Nemet2e2537f2014-08-07 17:53:55 +0000118 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
119 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet2e91ee52014-08-14 17:13:19 +0000120 [(set RC:$dst, MaskingRHS)]>,
121 EVEX_K {
122 // In case of the 3src subclass this is overridden with a let.
123 string Constraints = MaskingConstraint;
124 }
Adam Nemet7d498622014-08-07 23:53:38 +0000125 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000126 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Adam Nemet7d498622014-08-07 23:53:38 +0000127 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
128 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
129 [(set RC:$dst,
130 (vselect KRC:$mask, RHS,
131 (OpVT (bitconvert
132 (v16i32 immAllZerosV)))))]>,
133 EVEX_KZ;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000134}
135
Adam Nemet2e91ee52014-08-14 17:13:19 +0000136// This multiclass generates the unconditional/non-masking, the masking and
137// the zero-masking variant of the instruction. In the masking case, the
138// perserved vector elements come from a new dummy input operand tied to $dst.
139multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
140 string OpcodeStr,
141 string AttSrcAsm, string IntelSrcAsm,
142 dag RHS, ValueType OpVT, RegisterClass RC,
143 RegisterClass KRC> :
144 AVX512_masking_common<O, F, Outs,
145 Ins,
146 !con((ins RC:$src0, KRC:$mask), Ins),
147 !con((ins KRC:$mask), Ins),
148 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
149 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
150 "$src0 = $dst">;
151
152// Similar to AVX512_masking but in this case one of the source operands
153// ($src1) is already tied to $dst so we just use that for the preserved
154// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
155// $src1.
156multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
157 string OpcodeStr,
158 string AttSrcAsm, string IntelSrcAsm,
159 dag RHS, ValueType OpVT,
160 RegisterClass RC, RegisterClass KRC> :
161 AVX512_masking_common<O, F, Outs,
162 !con((ins RC:$src1), NonTiedIns),
163 !con((ins RC:$src1), !con((ins KRC:$mask),
164 NonTiedIns)),
165 !con((ins RC:$src1), !con((ins KRC:$mask),
166 NonTiedIns)),
167 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
168 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
169
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000170// Bitcasts between 512-bit vector types. Return the original type since
171// no instruction is needed for the conversion
172let Predicates = [HasAVX512] in {
Robert Khasanovbfa01312014-07-21 14:54:21 +0000173 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000174 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000175 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
176 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
177 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000178 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000179 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
180 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
181 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000182 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000183 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000184 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
185 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000186 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000187 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
188 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovsky40a77142014-08-11 09:59:08 +0000189 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000190 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
191 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000192 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000193 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
194 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
195 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
196 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
197 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
200 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
201 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
202 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
203 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000204
205 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
206 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
207 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
208 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
209 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
210 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
211 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
212 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
213 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
214 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
215 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
216 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
217 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
218 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
219 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
220 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
221 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
222 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
223 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
224 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
225 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
226 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
227 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
228 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
229 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
230 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
231 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
232 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
233 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
234 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
235
236// Bitcasts between 256-bit vector types. Return the original type since
237// no instruction is needed for the conversion
238 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
239 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
240 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
241 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
242 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
243 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
244 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
245 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
246 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
247 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
248 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
249 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
250 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
251 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
252 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
253 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
254 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
255 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
256 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
257 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
258 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
259 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
260 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
261 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
262 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
263 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
264 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
265 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
266 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
267 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
268}
269
270//
271// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
272//
273
274let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
275 isPseudo = 1, Predicates = [HasAVX512] in {
276def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
277 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
278}
279
Craig Topperfb1746b2014-01-30 06:03:19 +0000280let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000281def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
282def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
283def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000284}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000285
286//===----------------------------------------------------------------------===//
287// AVX-512 - VECTOR INSERT
288//
289// -- 32x8 form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000290let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000291def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
292 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
293 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
294 []>, EVEX_4V, EVEX_V512;
295let mayLoad = 1 in
296def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
297 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
300}
301
302// -- 64x4 fp form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000303let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000304def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
305 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
306 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
307 []>, EVEX_4V, EVEX_V512, VEX_W;
308let mayLoad = 1 in
309def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
310 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
313}
314// -- 32x4 integer form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000315let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000316def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
317 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
318 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512;
320let mayLoad = 1 in
321def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
322 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000325}
326
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000327let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000328// -- 64x4 form --
329def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
330 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
331 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
332 []>, EVEX_4V, EVEX_V512, VEX_W;
333let mayLoad = 1 in
334def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
335 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
338}
339
340def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
341 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
342 (INSERT_get_vinsert128_imm VR512:$ins))>;
343def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
344 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
345 (INSERT_get_vinsert128_imm VR512:$ins))>;
346def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
347 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
348 (INSERT_get_vinsert128_imm VR512:$ins))>;
349def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
350 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
351 (INSERT_get_vinsert128_imm VR512:$ins))>;
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000352
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000353def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
354 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000357 (bc_v4i32 (loadv2i64 addr:$src2)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000358 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
359 (INSERT_get_vinsert128_imm VR512:$ins))>;
360def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
364 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
365 (INSERT_get_vinsert128_imm VR512:$ins))>;
366
367def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
368 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
369 (INSERT_get_vinsert256_imm VR512:$ins))>;
370def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
371 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
372 (INSERT_get_vinsert256_imm VR512:$ins))>;
373def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
374 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
375 (INSERT_get_vinsert256_imm VR512:$ins))>;
376def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
377 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
378 (INSERT_get_vinsert256_imm VR512:$ins))>;
379
380def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
381 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
384 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
387 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
388 (INSERT_get_vinsert256_imm VR512:$ins))>;
389def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
390 (bc_v8i32 (loadv4i64 addr:$src2)),
391 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
393
394// vinsertps - insert f32 to XMM
395def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000396 (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000397 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000398 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000399 EVEX_4V;
400def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000401 (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000403 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000404 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
405 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
406
407//===----------------------------------------------------------------------===//
408// AVX-512 VECTOR EXTRACT
409//---
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000410let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000411// -- 32x4 form --
412def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
413 (ins VR512:$src1, i8imm:$src2),
414 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
415 []>, EVEX, EVEX_V512;
416def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
417 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
418 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
419 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
420
421// -- 64x4 form --
422def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
423 (ins VR512:$src1, i8imm:$src2),
424 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
425 []>, EVEX, EVEX_V512, VEX_W;
426let mayStore = 1 in
427def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
428 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
429 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
430 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
431}
432
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000433let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000434// -- 32x4 form --
435def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
436 (ins VR512:$src1, i8imm:$src2),
437 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
438 []>, EVEX, EVEX_V512;
439def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
440 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
441 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
442 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
443
444// -- 64x4 form --
445def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
446 (ins VR512:$src1, i8imm:$src2),
447 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
448 []>, EVEX, EVEX_V512, VEX_W;
449let mayStore = 1 in
450def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
451 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
452 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
453 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
454}
455
456def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
457 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
458 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
459
460def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
461 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
462 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
463
464def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
465 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
466 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
467
468def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
469 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
470 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
471
472
473def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
474 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
475 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
476
477def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
478 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
479 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
480
481def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
482 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
483 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
484
485def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
486 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
487 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
488
489// A 256-bit subvector extract from the first 512-bit vector position
490// is a subregister copy that needs no instruction.
491def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
492 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
493def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
494 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
495def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
496 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
497def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
498 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
499
500// zmm -> xmm
501def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
502 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
503def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
504 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
505def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
506 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
507def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
508 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
509
510
511// A 128-bit subvector insert to the first 512-bit vector position
512// is a subregister copy that needs no instruction.
513def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
515 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
516 sub_ymm)>;
517def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
518 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
519 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
520 sub_ymm)>;
521def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
523 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
524 sub_ymm)>;
525def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
526 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
527 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
528 sub_ymm)>;
529
530def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
531 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
532def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
533 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
534def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
535 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
536def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
537 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
538
539// vextractps - extract 32 bits from XMM
540def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000541 (ins VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000542 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000543 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
544 EVEX;
545
546def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
Chandler Carruth373b2b12014-09-06 10:00:01 +0000547 (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000548 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000549 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Elena Demikhovsky2aafc222014-02-11 07:25:59 +0000550 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551
552//===---------------------------------------------------------------------===//
553// AVX-512 BROADCAST
554//---
555multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
556 RegisterClass DestRC,
557 RegisterClass SrcRC, X86MemOperand x86memop> {
558 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000559 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000560 []>, EVEX;
561 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000562 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000563}
564let ExeDomain = SSEPackedSingle in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000565 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000566 VR128X, f32mem>,
567 EVEX_V512, EVEX_CD8<32, CD8VT1>;
568}
569
570let ExeDomain = SSEPackedDouble in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000571 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000572 VR128X, f64mem>,
573 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
574}
575
576def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
577 (VBROADCASTSSZrm addr:$src)>;
578def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
579 (VBROADCASTSDZrm addr:$src)>;
580
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000581def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
582 (VBROADCASTSSZrm addr:$src)>;
583def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
584 (VBROADCASTSDZrm addr:$src)>;
585
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000586multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
587 RegisterClass SrcRC, RegisterClass KRC> {
588 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000589 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000590 []>, EVEX, EVEX_V512;
591 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
592 (ins KRC:$mask, SrcRC:$src),
593 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000594 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595 []>, EVEX, EVEX_V512, EVEX_KZ;
596}
597
598defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
599defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
600 VEX_W;
601
602def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
603 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
604
605def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
606 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
607
608def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
609 (VPBROADCASTDrZrr GR32:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +0000610def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
611 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000612def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
613 (VPBROADCASTQrZrr GR64:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000614def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
615 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000616
Cameron McInally394d5572013-10-31 13:56:31 +0000617def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
618 (VPBROADCASTDrZrr GR32:$src)>;
619def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
620 (VPBROADCASTQrZrr GR64:$src)>;
621
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000622def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
623 (v16i32 immAllZerosV), (i16 GR16:$mask))),
624 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
625def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
626 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
627 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
628
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000629multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
630 X86MemOperand x86memop, PatFrag ld_frag,
631 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
632 RegisterClass KRC> {
633 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000634 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000635 [(set DstRC:$dst,
636 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
637 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
638 VR128X:$src),
639 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000640 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000641 [(set DstRC:$dst,
642 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
643 EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000644 let mayLoad = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000645 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000646 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000647 [(set DstRC:$dst,
648 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
649 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
650 x86memop:$src),
651 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000652 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000653 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
654 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000655 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000656}
657
658defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
659 loadi32, VR512, v16i32, v4i32, VK16WM>,
660 EVEX_V512, EVEX_CD8<32, CD8VT1>;
661defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
662 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
663 EVEX_CD8<64, CD8VT1>;
664
Adam Nemet73f72e12014-06-27 00:43:38 +0000665multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
666 X86MemOperand x86memop, PatFrag ld_frag,
667 RegisterClass KRC> {
668 let mayLoad = 1 in {
669 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
670 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
671 []>, EVEX;
672 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
673 x86memop:$src),
674 !strconcat(OpcodeStr,
675 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
676 []>, EVEX, EVEX_KZ;
677 }
678}
679
680defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
681 i128mem, loadv2i64, VK16WM>,
682 EVEX_V512, EVEX_CD8<32, CD8VT4>;
683defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
684 i256mem, loadv4i64, VK16WM>, VEX_W,
685 EVEX_V512, EVEX_CD8<64, CD8VT4>;
686
Cameron McInally394d5572013-10-31 13:56:31 +0000687def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
688 (VPBROADCASTDZrr VR128X:$src)>;
689def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
690 (VPBROADCASTQZrr VR128X:$src)>;
691
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000692def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
693 (VBROADCASTSSZrr VR128X:$src)>;
694def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
695 (VBROADCASTSDZrr VR128X:$src)>;
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000696
697def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
698 (VBROADCASTSSZrr VR128X:$src)>;
699def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
700 (VBROADCASTSDZrr VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000701
702// Provide fallback in case the load node that is used in the patterns above
703// is used by additional users, which prevents the pattern selection.
704def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
705 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
706def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
707 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
708
709
710let Predicates = [HasAVX512] in {
711def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
712 (EXTRACT_SUBREG
713 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
714 addr:$src)), sub_ymm)>;
715}
716//===----------------------------------------------------------------------===//
717// AVX-512 BROADCAST MASK TO VECTOR REGISTER
718//---
719
720multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
721 RegisterClass DstRC, RegisterClass KRC,
722 ValueType OpVT, ValueType SrcVT> {
723def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000724 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000725 []>, EVEX;
726}
727
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000728let Predicates = [HasCDI] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000729defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
730 VK16, v16i32, v16i1>, EVEX_V512;
731defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
732 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000733}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000734
735//===----------------------------------------------------------------------===//
736// AVX-512 - VPERM
737//
738// -- immediate form --
739multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
740 SDNode OpNode, PatFrag mem_frag,
741 X86MemOperand x86memop, ValueType OpVT> {
742 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
743 (ins RC:$src1, i8imm:$src2),
744 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000745 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000746 [(set RC:$dst,
747 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
748 EVEX;
749 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
750 (ins x86memop:$src1, i8imm:$src2),
751 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000752 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000753 [(set RC:$dst,
754 (OpVT (OpNode (mem_frag addr:$src1),
755 (i8 imm:$src2))))]>, EVEX;
756}
757
758defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
759 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760let ExeDomain = SSEPackedDouble in
761defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
762 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
763
764// -- VPERM - register form --
765multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
766 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
767
768 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
769 (ins RC:$src1, RC:$src2),
770 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000771 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000772 [(set RC:$dst,
773 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
774
775 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
776 (ins RC:$src1, x86memop:$src2),
777 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000778 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000779 [(set RC:$dst,
780 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
781 EVEX_4V;
782}
783
784defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
785 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
786defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
787 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
788let ExeDomain = SSEPackedSingle in
789defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
790 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
791let ExeDomain = SSEPackedDouble in
792defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
793 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
794
795// -- VPERM2I - 3 source operands form --
796multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
797 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet2415a492014-07-02 21:25:54 +0000798 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000799let Constraints = "$src1 = $dst" in {
800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
801 (ins RC:$src1, RC:$src2, RC:$src3),
802 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000803 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000804 [(set RC:$dst,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000805 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000806 EVEX_4V;
807
Adam Nemet2415a492014-07-02 21:25:54 +0000808 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
809 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
810 !strconcat(OpcodeStr,
811 " \t{$src3, $src2, $dst {${mask}}|"
812 "$dst {${mask}}, $src2, $src3}"),
813 [(set RC:$dst, (OpVT (vselect KRC:$mask,
814 (OpNode RC:$src1, RC:$src2,
815 RC:$src3),
816 RC:$src1)))]>,
817 EVEX_4V, EVEX_K;
818
819 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
821 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
822 !strconcat(OpcodeStr,
823 " \t{$src3, $src2, $dst {${mask}} {z} |",
824 "$dst {${mask}} {z}, $src2, $src3}"),
825 [(set RC:$dst, (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
827 RC:$src3),
828 (OpVT (bitconvert
829 (v16i32 immAllZerosV))))))]>,
830 EVEX_4V, EVEX_KZ;
831
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000832 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
833 (ins RC:$src1, RC:$src2, x86memop:$src3),
834 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000835 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000836 [(set RC:$dst,
Adam Nemet2415a492014-07-02 21:25:54 +0000837 (OpVT (OpNode RC:$src1, RC:$src2,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000838 (mem_frag addr:$src3))))]>, EVEX_4V;
Adam Nemet2415a492014-07-02 21:25:54 +0000839
840 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
841 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
842 !strconcat(OpcodeStr,
843 " \t{$src3, $src2, $dst {${mask}}|"
844 "$dst {${mask}}, $src2, $src3}"),
845 [(set RC:$dst,
846 (OpVT (vselect KRC:$mask,
847 (OpNode RC:$src1, RC:$src2,
848 (mem_frag addr:$src3)),
849 RC:$src1)))]>,
850 EVEX_4V, EVEX_K;
851
852 let AddedComplexity = 10 in // Prefer over the rrkz variant
853 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
854 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
855 !strconcat(OpcodeStr,
856 " \t{$src3, $src2, $dst {${mask}} {z}|"
857 "$dst {${mask}} {z}, $src2, $src3}"),
858 [(set RC:$dst,
859 (OpVT (vselect KRC:$mask,
860 (OpNode RC:$src1, RC:$src2,
861 (mem_frag addr:$src3)),
862 (OpVT (bitconvert
863 (v16i32 immAllZerosV))))))]>,
864 EVEX_4V, EVEX_KZ;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000865 }
866}
Adam Nemet2415a492014-07-02 21:25:54 +0000867defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
868 i512mem, X86VPermiv3, v16i32, VK16WM>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
870defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
871 i512mem, X86VPermiv3, v8i64, VK8WM>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
873defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
874 i512mem, X86VPermiv3, v16f32, VK16WM>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
876defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
877 i512mem, X86VPermiv3, v8f64, VK8WM>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000879
Adam Nemetefe9c982014-07-02 21:25:58 +0000880multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
881 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000882 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
883 ValueType MaskVT, RegisterClass MRC> :
Adam Nemetefe9c982014-07-02 21:25:58 +0000884 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
885 OpVT, KRC> {
886 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
887 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
888 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000889
890 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
891 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
892 (!cast<Instruction>(NAME#rrk) VR512:$src1,
893 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000894}
895
896defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000897 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
898 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000899defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000900 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
901 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000902defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000903 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
904 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000905defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000906 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
907 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000908
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000909//===----------------------------------------------------------------------===//
910// AVX-512 - BLEND using mask
911//
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000912multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000913 RegisterClass KRC, RegisterClass RC,
914 X86MemOperand x86memop, PatFrag mem_frag,
915 SDNode OpNode, ValueType vt> {
916 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000917 (ins KRC:$mask, RC:$src1, RC:$src2),
918 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000919 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000920 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000921 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000922 let mayLoad = 1 in
923 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
924 (ins KRC:$mask, RC:$src1, x86memop:$src2),
925 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000926 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000927 []>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000928}
929
930let ExeDomain = SSEPackedSingle in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000931defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000932 VK16WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000933 memopv16f32, vselect, v16f32>,
934 EVEX_CD8<32, CD8VF>, EVEX_V512;
935let ExeDomain = SSEPackedDouble in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000936defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000937 VK8WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000938 memopv8f64, vselect, v8f64>,
939 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
940
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000941def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
942 (v16f32 VR512:$src2), (i16 GR16:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000943 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000944 VR512:$src1, VR512:$src2)>;
945
946def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
947 (v8f64 VR512:$src2), (i8 GR8:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000948 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000949 VR512:$src1, VR512:$src2)>;
950
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000951defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000952 VK16WM, VR512, f512mem,
953 memopv16i32, vselect, v16i32>,
954 EVEX_CD8<32, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000955
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000956defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000957 VK8WM, VR512, f512mem,
958 memopv8i64, vselect, v8i64>,
959 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000960
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000961def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
962 (v16i32 VR512:$src2), (i16 GR16:$mask))),
963 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
964 VR512:$src1, VR512:$src2)>;
965
966def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
967 (v8i64 VR512:$src2), (i8 GR8:$mask))),
968 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
969 VR512:$src1, VR512:$src2)>;
970
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000971let Predicates = [HasAVX512] in {
972def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
973 (v8f32 VR256X:$src2))),
974 (EXTRACT_SUBREG
975 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
976 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
977 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
978
979def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
980 (v8i32 VR256X:$src2))),
981 (EXTRACT_SUBREG
982 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
983 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
984 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
985}
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000986//===----------------------------------------------------------------------===//
987// Compare Instructions
988//===----------------------------------------------------------------------===//
989
990// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
991multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
992 Operand CC, SDNode OpNode, ValueType VT,
993 PatFrag ld_frag, string asm, string asm_alt> {
994 def rr : AVX512Ii8<0xC2, MRMSrcReg,
995 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
996 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
997 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
998 def rm : AVX512Ii8<0xC2, MRMSrcMem,
999 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
1000 [(set VK1:$dst, (OpNode (VT RC:$src1),
1001 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Craig Topper0550ce72014-01-05 04:55:55 +00001002 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001003 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
1004 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1005 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1006 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
1007 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1008 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1009 }
1010}
1011
1012let Predicates = [HasAVX512] in {
1013defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
1014 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1015 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1016 XS;
1017defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
1018 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1019 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1020 XD, VEX_W;
1021}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001022
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001023multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1024 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001025 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001026 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1027 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1028 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001029 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001030 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001031 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001032 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1034 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1035 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001036 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001037 def rrk : AVX512BI<opc, MRMSrcReg,
1038 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1040 "$dst {${mask}}, $src1, $src2}"),
1041 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1042 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1043 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1044 let mayLoad = 1 in
1045 def rmk : AVX512BI<opc, MRMSrcMem,
1046 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1048 "$dst {${mask}}, $src1, $src2}"),
1049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1050 (OpNode (_.VT _.RC:$src1),
1051 (_.VT (bitconvert
1052 (_.LdFrag addr:$src2))))))],
1053 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001054}
1055
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001056multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1057 X86VectorVTInfo _> {
1058 let mayLoad = 1 in {
1059 def rmb : AVX512BI<opc, MRMSrcMem,
1060 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1061 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1062 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1063 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1064 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1065 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1066 def rmbk : AVX512BI<opc, MRMSrcMem,
1067 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1068 _.ScalarMemOp:$src2),
1069 !strconcat(OpcodeStr,
1070 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1071 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1072 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1073 (OpNode (_.VT _.RC:$src1),
1074 (X86VBroadcast
1075 (_.ScalarLdFrag addr:$src2)))))],
1076 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1077 }
1078}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001079
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001080multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1081 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1082 let Predicates = [prd] in
1083 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1084 EVEX_V512;
1085
1086 let Predicates = [prd, HasVLX] in {
1087 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1088 EVEX_V256;
1089 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1090 EVEX_V128;
1091 }
1092}
1093
1094multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1095 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1096 Predicate prd> {
1097 let Predicates = [prd] in
1098 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1099 EVEX_V512;
1100
1101 let Predicates = [prd, HasVLX] in {
1102 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1103 EVEX_V256;
1104 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1105 EVEX_V128;
1106 }
1107}
1108
1109defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1110 avx512vl_i8_info, HasBWI>,
1111 EVEX_CD8<8, CD8VF>;
1112
1113defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1114 avx512vl_i16_info, HasBWI>,
1115 EVEX_CD8<16, CD8VF>;
1116
1117defm VPCMPEQD : avx512_icmp_packed_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1118 avx512vl_i32_info, HasAVX512>,
1119 avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1120 avx512vl_i32_info, HasAVX512>,
1121 EVEX_CD8<32, CD8VF>;
1122
1123defm VPCMPEQQ : avx512_icmp_packed_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1124 avx512vl_i64_info, HasAVX512>,
1125 avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1126 avx512vl_i64_info, HasAVX512>,
1127 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1128
1129defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1130 avx512vl_i8_info, HasBWI>,
1131 EVEX_CD8<8, CD8VF>;
1132
1133defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1134 avx512vl_i16_info, HasBWI>,
1135 EVEX_CD8<16, CD8VF>;
1136
1137defm VPCMPGTD : avx512_icmp_packed_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1138 avx512vl_i32_info, HasAVX512>,
1139 avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1140 avx512vl_i32_info, HasAVX512>,
1141 EVEX_CD8<32, CD8VF>;
1142
1143defm VPCMPGTQ : avx512_icmp_packed_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1144 avx512vl_i64_info, HasAVX512>,
1145 avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1146 avx512vl_i64_info, HasAVX512>,
1147 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001148
1149def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001150 (COPY_TO_REGCLASS (VPCMPGTDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001151 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1152 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1153
1154def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001155 (COPY_TO_REGCLASS (VPCMPEQDZrr
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001156 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1157 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1158
Robert Khasanov29e3b962014-08-27 09:34:37 +00001159multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1160 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001161 def rri : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001162 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001163 !strconcat("vpcmp${cc}", Suffix,
1164 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001165 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1166 imm:$cc))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001167 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001168 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001169 def rmi : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001170 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001171 !strconcat("vpcmp${cc}", Suffix,
1172 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Robert Khasanov29e3b962014-08-27 09:34:37 +00001173 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1174 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1175 imm:$cc))],
1176 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1177 def rrik : AVX512AIi8<opc, MRMSrcReg,
1178 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1179 AVXCC:$cc),
1180 !strconcat("vpcmp${cc}", Suffix,
1181 "\t{$src2, $src1, $dst {${mask}}|",
1182 "$dst {${mask}}, $src1, $src2}"),
1183 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1184 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1185 imm:$cc)))],
1186 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1187 let mayLoad = 1 in
1188 def rmik : AVX512AIi8<opc, MRMSrcMem,
1189 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1190 AVXCC:$cc),
1191 !strconcat("vpcmp${cc}", Suffix,
1192 "\t{$src2, $src1, $dst {${mask}}|",
1193 "$dst {${mask}}, $src1, $src2}"),
1194 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1195 (OpNode (_.VT _.RC:$src1),
1196 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1197 imm:$cc)))],
1198 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1199
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001200 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001201 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001202 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001203 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
1204 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1205 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001206 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001207 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001208 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
1209 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1210 "$dst, $src1, $src2, $cc}"),
Adam Nemet1efcb902014-07-01 18:03:43 +00001211 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov29e3b962014-08-27 09:34:37 +00001212 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1213 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1214 i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001215 !strconcat("vpcmp", Suffix,
Robert Khasanov29e3b962014-08-27 09:34:37 +00001216 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1217 "$dst {${mask}}, $src1, $src2, $cc}"),
1218 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1219 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1220 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1221 i8imm:$cc),
1222 !strconcat("vpcmp", Suffix,
1223 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1224 "$dst {${mask}}, $src1, $src2, $cc}"),
Adam Nemet16de2482014-07-01 18:03:45 +00001225 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001226 }
1227}
1228
Robert Khasanov29e3b962014-08-27 09:34:37 +00001229multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
1230 X86VectorVTInfo _> {
1231 let mayLoad = 1 in {
1232 def rmib : AVX512AIi8<opc, MRMSrcMem,
1233 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1234 AVXCC:$cc),
1235 !strconcat("vpcmp${cc}", Suffix,
1236 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1237 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1238 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1239 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1240 imm:$cc))],
1241 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1242 def rmibk : AVX512AIi8<opc, MRMSrcMem,
1243 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1244 _.ScalarMemOp:$src2, AVXCC:$cc),
1245 !strconcat("vpcmp${cc}", Suffix,
1246 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1247 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1248 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1249 (OpNode (_.VT _.RC:$src1),
1250 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1251 imm:$cc)))],
1252 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1253 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001254
Robert Khasanov29e3b962014-08-27 09:34:37 +00001255 // Accept explicit immediate argument form instead of comparison code.
1256 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1257 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1258 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1259 i8imm:$cc),
1260 !strconcat("vpcmp", Suffix,
1261 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1262 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1263 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1264 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1265 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1266 _.ScalarMemOp:$src2, i8imm:$cc),
1267 !strconcat("vpcmp", Suffix,
1268 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1269 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1270 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1271 }
1272}
1273
1274multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1275 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1276 let Predicates = [prd] in
1277 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1278
1279 let Predicates = [prd, HasVLX] in {
1280 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1281 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1282 }
1283}
1284
1285multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1286 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1287 let Predicates = [prd] in
1288 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1289 EVEX_V512;
1290
1291 let Predicates = [prd, HasVLX] in {
1292 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1293 EVEX_V256;
1294 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1295 EVEX_V128;
1296 }
1297}
1298
1299defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1300 HasBWI>, EVEX_CD8<8, CD8VF>;
1301defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1302 HasBWI>, EVEX_CD8<8, CD8VF>;
1303
1304defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1305 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1306defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1307 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1308
1309defm VPCMPD : avx512_icmp_cc_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1310 HasAVX512>,
1311 avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1312 HasAVX512>, EVEX_CD8<32, CD8VF>;
1313defm VPCMPUD : avx512_icmp_cc_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1314 HasAVX512>,
1315 avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1316 HasAVX512>, EVEX_CD8<32, CD8VF>;
1317
1318defm VPCMPQ : avx512_icmp_cc_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1319 HasAVX512>,
1320 avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1321 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1322defm VPCMPUQ : avx512_icmp_cc_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1323 HasAVX512>,
1324 avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1325 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001326
Adam Nemet905832b2014-06-26 00:21:12 +00001327// avx512_cmp_packed - compare packed instructions
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001328multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001329 X86MemOperand x86memop, ValueType vt,
1330 string suffix, Domain d> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001331 def rri : AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001332 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1333 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001334 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001335 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1336 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001337 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001338 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001339 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001340 [], d>, EVEX_B;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001341 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001342 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001343 !strconcat("vcmp${cc}", suffix,
1344 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001345 [(set KRC:$dst,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001346 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001347
1348 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001349 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Craig Toppera328ee42013-10-09 04:24:38 +00001350 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
Adam Nemet905832b2014-06-26 00:21:12 +00001351 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001352 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001353 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Craig Toppera328ee42013-10-09 04:24:38 +00001354 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
Adam Nemet905832b2014-06-26 00:21:12 +00001355 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001356 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001357 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001358 }
1359}
1360
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001361defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00001362 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
Craig Topperda7160d2014-02-01 08:17:56 +00001363 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001364defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00001365 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001366 EVEX_CD8<64, CD8VF>;
1367
1368def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1369 (COPY_TO_REGCLASS (VCMPPSZrri
1370 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1371 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1372 imm:$cc), VK8)>;
1373def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1374 (COPY_TO_REGCLASS (VPCMPDZrri
1375 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1376 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1377 imm:$cc), VK8)>;
1378def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1379 (COPY_TO_REGCLASS (VPCMPUDZrri
1380 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1381 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1382 imm:$cc), VK8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001383
1384def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1385 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1386 FROUND_NO_EXC)),
1387 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001388 (I8Imm imm:$cc)), GR16)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001389
1390def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1391 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1392 FROUND_NO_EXC)),
1393 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001394 (I8Imm imm:$cc)), GR8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001395
1396def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1397 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1398 FROUND_CURRENT)),
1399 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1400 (I8Imm imm:$cc)), GR16)>;
1401
1402def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1403 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1404 FROUND_CURRENT)),
1405 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1406 (I8Imm imm:$cc)), GR8)>;
1407
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001408// Mask register copy, including
1409// - copy between mask registers
1410// - load/store mask registers
1411// - copy from GPR to mask register and vice versa
1412//
1413multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1414 string OpcodeStr, RegisterClass KRC,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001415 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001416 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001417 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001418 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001419 let mayLoad = 1 in
1420 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001421 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Robert Khasanov74acbb72014-07-23 14:49:42 +00001422 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001423 let mayStore = 1 in
1424 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001425 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001426 }
1427}
1428
1429multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1430 string OpcodeStr,
1431 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001432 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001433 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001434 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001435 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001436 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001437 }
1438}
1439
Robert Khasanov74acbb72014-07-23 14:49:42 +00001440let Predicates = [HasDQI] in
1441 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1442 i8mem>,
1443 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1444 VEX, PD;
1445
1446let Predicates = [HasAVX512] in
1447 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1448 i16mem>,
1449 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001450 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001451
1452let Predicates = [HasBWI] in {
1453 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1454 i32mem>, VEX, PD, VEX_W;
1455 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1456 VEX, XD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001457}
1458
Robert Khasanov74acbb72014-07-23 14:49:42 +00001459let Predicates = [HasBWI] in {
1460 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1461 i64mem>, VEX, PS, VEX_W;
1462 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1463 VEX, XD, VEX_W;
1464}
1465
1466// GR from/to mask register
1467let Predicates = [HasDQI] in {
1468 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1469 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1470 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1471 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1472}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001473let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001474 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1475 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1476 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1477 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001478}
1479let Predicates = [HasBWI] in {
1480 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1481 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1482}
1483let Predicates = [HasBWI] in {
1484 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1485 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1486}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001487
Robert Khasanov74acbb72014-07-23 14:49:42 +00001488// Load/store kreg
1489let Predicates = [HasDQI] in {
1490 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1491 (KMOVBmk addr:$dst, VK8:$src)>;
1492}
1493let Predicates = [HasAVX512] in {
1494 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001495 (KMOVWmk addr:$dst, VK16:$src)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001496 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001497 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001498 def : Pat<(i1 (load addr:$src)),
1499 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001500 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001501 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001502}
1503let Predicates = [HasBWI] in {
1504 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1505 (KMOVDmk addr:$dst, VK32:$src)>;
1506}
1507let Predicates = [HasBWI] in {
1508 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1509 (KMOVQmk addr:$dst, VK64:$src)>;
1510}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001511
Robert Khasanov74acbb72014-07-23 14:49:42 +00001512let Predicates = [HasAVX512] in {
Elena Demikhovsky34d2d762014-08-18 11:59:06 +00001513 def : Pat<(i1 (trunc (i64 GR64:$src))),
1514 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1515 (i32 1))), VK1)>;
1516
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001517 def : Pat<(i1 (trunc (i32 GR32:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001518 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001519
1520 def : Pat<(i1 (trunc (i8 GR8:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001521 (COPY_TO_REGCLASS
1522 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1523 VK1)>;
1524 def : Pat<(i1 (trunc (i16 GR16:$src))),
1525 (COPY_TO_REGCLASS
1526 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1527 VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001528
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001529 def : Pat<(i32 (zext VK1:$src)),
1530 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001531 def : Pat<(i8 (zext VK1:$src)),
1532 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001533 (AND32ri (KMOVWrk
1534 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001535 def : Pat<(i64 (zext VK1:$src)),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001536 (AND64ri8 (SUBREG_TO_REG (i64 0),
1537 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
Elena Demikhovsky750498c2014-02-17 07:29:33 +00001538 def : Pat<(i16 (zext VK1:$src)),
1539 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001540 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1541 sub_16bit)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001542 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1543 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1544 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1545 (COPY_TO_REGCLASS VK1:$src, VK8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001546}
Robert Khasanov74acbb72014-07-23 14:49:42 +00001547let Predicates = [HasBWI] in {
1548 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1549 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1550 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1551 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1552}
1553
1554
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001555// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1556let Predicates = [HasAVX512] in {
1557 // GR from/to 8-bit mask without native support
1558 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1559 (COPY_TO_REGCLASS
1560 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1561 VK8)>;
1562 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1563 (EXTRACT_SUBREG
1564 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1565 sub_8bit)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001566
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001567 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001568 (COPY_TO_REGCLASS VK16:$src, VK1)>;
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001569 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001570 (COPY_TO_REGCLASS VK8:$src, VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001571}
1572let Predicates = [HasBWI] in {
1573 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1574 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1575 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1576 (COPY_TO_REGCLASS VK64:$src, VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001577}
1578
1579// Mask unary operation
1580// - KNOT
1581multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001582 RegisterClass KRC, SDPatternOperator OpNode,
1583 Predicate prd> {
1584 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001585 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001586 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001587 [(set KRC:$dst, (OpNode KRC:$src))]>;
1588}
1589
Robert Khasanov74acbb72014-07-23 14:49:42 +00001590multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1591 SDPatternOperator OpNode> {
1592 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1593 HasDQI>, VEX, PD;
1594 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1595 HasAVX512>, VEX, PS;
1596 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1597 HasBWI>, VEX, PD, VEX_W;
1598 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1599 HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001600}
1601
Robert Khasanov74acbb72014-07-23 14:49:42 +00001602defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001603
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001604multiclass avx512_mask_unop_int<string IntName, string InstName> {
1605 let Predicates = [HasAVX512] in
1606 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1607 (i16 GR16:$src)),
1608 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1609 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1610}
1611defm : avx512_mask_unop_int<"knot", "KNOT">;
1612
Robert Khasanov74acbb72014-07-23 14:49:42 +00001613let Predicates = [HasDQI] in
1614def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1615let Predicates = [HasAVX512] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001616def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001617let Predicates = [HasBWI] in
1618def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1619let Predicates = [HasBWI] in
1620def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1621
1622// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1623let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001624def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1625 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1626
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001627def : Pat<(not VK8:$src),
1628 (COPY_TO_REGCLASS
1629 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001630}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001631
1632// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001633// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001634multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00001635 RegisterClass KRC, SDPatternOperator OpNode,
1636 Predicate prd> {
1637 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001638 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1639 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001640 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001641 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1642}
1643
Robert Khasanov595683d2014-07-28 13:46:45 +00001644multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1645 SDPatternOperator OpNode> {
1646 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1647 HasDQI>, VEX_4V, VEX_L, PD;
1648 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1649 HasAVX512>, VEX_4V, VEX_L, PS;
1650 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1651 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1652 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1653 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001654}
1655
1656def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1657def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1658
1659let isCommutable = 1 in {
Robert Khasanov595683d2014-07-28 13:46:45 +00001660 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1661 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1662 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1663 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001664}
Robert Khasanov595683d2014-07-28 13:46:45 +00001665let isCommutable = 0 in
1666 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001667
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001668def : Pat<(xor VK1:$src1, VK1:$src2),
1669 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1670 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1671
1672def : Pat<(or VK1:$src1, VK1:$src2),
1673 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1674 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1675
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00001676def : Pat<(and VK1:$src1, VK1:$src2),
1677 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1678 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1679
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001680multiclass avx512_mask_binop_int<string IntName, string InstName> {
1681 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001682 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1683 (i16 GR16:$src1), (i16 GR16:$src2)),
1684 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1685 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1686 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001687}
1688
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001689defm : avx512_mask_binop_int<"kand", "KAND">;
1690defm : avx512_mask_binop_int<"kandn", "KANDN">;
1691defm : avx512_mask_binop_int<"kor", "KOR">;
1692defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1693defm : avx512_mask_binop_int<"kxor", "KXOR">;
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001694
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001695// With AVX-512, 8-bit mask is promoted to 16-bit mask.
1696multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1697 let Predicates = [HasAVX512] in
1698 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1699 (COPY_TO_REGCLASS
1700 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1701 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1702}
1703
1704defm : avx512_binop_pat<and, KANDWrr>;
1705defm : avx512_binop_pat<andn, KANDNWrr>;
1706defm : avx512_binop_pat<or, KORWrr>;
1707defm : avx512_binop_pat<xnor, KXNORWrr>;
1708defm : avx512_binop_pat<xor, KXORWrr>;
1709
1710// Mask unpacking
1711multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001712 RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001713 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001714 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001715 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001716 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001717}
1718
1719multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001720 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
Craig Topperae11aed2014-01-14 07:41:20 +00001721 VEX_4V, VEX_L, PD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001722}
1723
1724defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001725def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1726 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1727 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1728
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001729
1730multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1731 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001732 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1733 (i16 GR16:$src1), (i16 GR16:$src2)),
1734 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1735 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1736 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001737}
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001738defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001739
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001740// Mask bit testing
1741multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1742 SDNode OpNode> {
1743 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1744 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001745 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001746 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1747}
1748
1749multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1750 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001751 VEX, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001752}
1753
1754defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001755
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001756def : Pat<(X86cmp VK1:$src1, (i1 0)),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001757 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001758 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001759
1760// Mask shift
1761multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1762 SDNode OpNode> {
1763 let Predicates = [HasAVX512] in
1764 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1765 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001766 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001767 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1768}
1769
1770multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1771 SDNode OpNode> {
1772 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topperae11aed2014-01-14 07:41:20 +00001773 VEX, TAPD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001774}
1775
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001776defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1777defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001778
1779// Mask setting all 0s or 1s
1780multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1781 let Predicates = [HasAVX512] in
1782 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1783 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1784 [(set KRC:$dst, (VT Val))]>;
1785}
1786
1787multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001788 defm B : avx512_mask_setop<VK8, v8i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001789 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1790}
1791
1792defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1793defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1794
1795// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1796let Predicates = [HasAVX512] in {
1797 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1798 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001799 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1800 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1801 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001802}
1803def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1804 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1805
1806def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1807 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1808
1809def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1810 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1811
Elena Demikhovsky9737e382014-03-02 09:19:44 +00001812def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1813 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1814
1815def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1816 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001817//===----------------------------------------------------------------------===//
1818// AVX-512 - Aligned and unaligned load and store
1819//
1820
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001821multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1822 RegisterClass KRC, RegisterClass RC,
1823 ValueType vt, ValueType zvt, X86MemOperand memop,
1824 Domain d, bit IsReMaterializable = 1> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001825let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001826 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001827 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1828 d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001829 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001830 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1831 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001832 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001833 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1834 SchedRW = [WriteLoad] in
1835 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1836 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1837 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1838 d>, EVEX;
1839
1840 let AddedComplexity = 20 in {
1841 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1842 let hasSideEffects = 0 in
1843 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1844 (ins RC:$src0, KRC:$mask, RC:$src1),
1845 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1846 "${dst} {${mask}}, $src1}"),
1847 [(set RC:$dst, (vt (vselect KRC:$mask,
1848 (vt RC:$src1),
1849 (vt RC:$src0))))],
1850 d>, EVEX, EVEX_K;
1851 let mayLoad = 1, SchedRW = [WriteLoad] in
1852 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1853 (ins RC:$src0, KRC:$mask, memop:$src1),
1854 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1855 "${dst} {${mask}}, $src1}"),
1856 [(set RC:$dst, (vt
1857 (vselect KRC:$mask,
1858 (vt (bitconvert (ld_frag addr:$src1))),
1859 (vt RC:$src0))))],
1860 d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001861 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001862 let mayLoad = 1, SchedRW = [WriteLoad] in
1863 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1864 (ins KRC:$mask, memop:$src),
1865 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1866 "${dst} {${mask}} {z}, $src}"),
1867 [(set RC:$dst, (vt
1868 (vselect KRC:$mask,
1869 (vt (bitconvert (ld_frag addr:$src))),
1870 (vt (bitconvert (zvt immAllZerosV))))))],
1871 d>, EVEX, EVEX_KZ;
1872 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001873}
1874
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001875multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1876 string elty, string elsz, string vsz512,
1877 string vsz256, string vsz128, Domain d,
1878 Predicate prd, bit IsReMaterializable = 1> {
1879 let Predicates = [prd] in
1880 defm Z : avx512_load<opc, OpcodeStr,
1881 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1882 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1883 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1884 !cast<X86MemOperand>(elty##"512mem"), d,
1885 IsReMaterializable>, EVEX_V512;
1886
1887 let Predicates = [prd, HasVLX] in {
1888 defm Z256 : avx512_load<opc, OpcodeStr,
1889 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1890 "v"##vsz256##elty##elsz, "v4i64")),
1891 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1892 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1893 !cast<X86MemOperand>(elty##"256mem"), d,
1894 IsReMaterializable>, EVEX_V256;
1895
1896 defm Z128 : avx512_load<opc, OpcodeStr,
1897 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1898 "v"##vsz128##elty##elsz, "v2i64")),
1899 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1900 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1901 !cast<X86MemOperand>(elty##"128mem"), d,
1902 IsReMaterializable>, EVEX_V128;
1903 }
1904}
1905
1906
1907multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1908 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1909 X86MemOperand memop, Domain d> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001910 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1911 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001912 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001913 EVEX;
1914 let Constraints = "$src1 = $dst" in
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001915 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1916 (ins RC:$src1, KRC:$mask, RC:$src2),
1917 !strconcat(OpcodeStr,
1918 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001919 EVEX, EVEX_K;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001920 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001921 (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001922 !strconcat(OpcodeStr,
1923 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001924 [], d>, EVEX, EVEX_KZ;
1925 }
1926 let mayStore = 1 in {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001927 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1928 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1929 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001930 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001931 (ins memop:$dst, KRC:$mask, RC:$src),
1932 !strconcat(OpcodeStr,
1933 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001934 [], d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001935 }
1936}
1937
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001938
1939multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1940 string st_suff_512, string st_suff_256,
1941 string st_suff_128, string elty, string elsz,
1942 string vsz512, string vsz256, string vsz128,
1943 Domain d, Predicate prd> {
1944 let Predicates = [prd] in
1945 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1946 !cast<ValueType>("v"##vsz512##elty##elsz),
1947 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1948 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1949
1950 let Predicates = [prd, HasVLX] in {
1951 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1952 !cast<ValueType>("v"##vsz256##elty##elsz),
1953 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1954 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1955
1956 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1957 !cast<ValueType>("v"##vsz128##elty##elsz),
1958 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1959 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1960 }
1961}
1962
1963defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1964 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1965 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1966 "512", "256", "", "f", "32", "16", "8", "4",
1967 SSEPackedSingle, HasAVX512>,
1968 PS, EVEX_CD8<32, CD8VF>;
1969
1970defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1971 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1972 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1973 "512", "256", "", "f", "64", "8", "4", "2",
1974 SSEPackedDouble, HasAVX512>,
1975 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1976
1977defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1978 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1979 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1980 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1981 PS, EVEX_CD8<32, CD8VF>;
1982
1983defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1984 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1985 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1986 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1987 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1988
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001989def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001990 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001991 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001992
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001993def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1994 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1995 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001996
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001997def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1998 GR16:$mask),
1999 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2000 VR512:$src)>;
2001def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
2002 GR8:$mask),
2003 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2004 VR512:$src)>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00002005
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002006defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
2007 "16", "8", "4", SSEPackedInt, HasAVX512>,
2008 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
2009 "512", "256", "", "i", "32", "16", "8", "4",
2010 SSEPackedInt, HasAVX512>,
2011 PD, EVEX_CD8<32, CD8VF>;
2012
2013defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
2014 "8", "4", "2", SSEPackedInt, HasAVX512>,
2015 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
2016 "512", "256", "", "i", "64", "8", "4", "2",
2017 SSEPackedInt, HasAVX512>,
2018 PD, VEX_W, EVEX_CD8<64, CD8VF>;
2019
2020defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
2021 "64", "32", "16", SSEPackedInt, HasBWI>,
2022 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
2023 "i", "8", "64", "32", "16", SSEPackedInt,
2024 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
2025
2026defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
2027 "32", "16", "8", SSEPackedInt, HasBWI>,
2028 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
2029 "i", "16", "32", "16", "8", SSEPackedInt,
2030 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
2031
2032defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
2033 "16", "8", "4", SSEPackedInt, HasAVX512>,
2034 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
2035 "i", "32", "16", "8", "4", SSEPackedInt,
2036 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
2037
2038defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
2039 "8", "4", "2", SSEPackedInt, HasAVX512>,
2040 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
2041 "i", "64", "8", "4", "2", SSEPackedInt,
2042 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00002043
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002044def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
2045 (v16i32 immAllZerosV), GR16:$mask)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002046 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002047
2048def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002049 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2050 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00002051
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002052def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002053 GR16:$mask),
2054 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002055 VR512:$src)>;
2056def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002057 GR8:$mask),
2058 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00002059 VR512:$src)>;
2060
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002061let AddedComplexity = 20 in {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002062def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002063 (bc_v8i64 (v16i32 immAllZerosV)))),
2064 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002065
2066def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002067 (v8i64 VR512:$src))),
2068 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002069 VK8), VR512:$src)>;
2070
2071def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2072 (v16i32 immAllZerosV))),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002073 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002074
2075def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002076 (v16i32 VR512:$src))),
2077 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002078}
Robert Khasanov7ca7df02014-08-04 14:35:15 +00002079
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002080// Move Int Doubleword to Packed Double Int
2081//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002082def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002083 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002084 [(set VR128X:$dst,
2085 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2086 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002087def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002088 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002089 [(set VR128X:$dst,
2090 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2091 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002092def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002093 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002094 [(set VR128X:$dst,
2095 (v2i64 (scalar_to_vector GR64:$src)))],
2096 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
Craig Topper88adf2a2013-10-12 05:41:08 +00002097let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002098def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002099 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002100 [(set FR64:$dst, (bitconvert GR64:$src))],
2101 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002102def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002103 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002104 [(set GR64:$dst, (bitconvert FR64:$src))],
2105 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002106}
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002107def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002108 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002109 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2110 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2111 EVEX_CD8<64, CD8VT1>;
2112
2113// Move Int Doubleword to Single Scalar
2114//
Craig Topper88adf2a2013-10-12 05:41:08 +00002115let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002116def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002117 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002118 [(set FR32X:$dst, (bitconvert GR32:$src))],
2119 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2120
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002121def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002122 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002123 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2124 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002125}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002126
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002127// Move doubleword from xmm register to r/m32
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002128//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002129def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002130 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002131 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2132 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2133 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002134def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002135 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002136 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002137 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2138 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2139 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2140
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002141// Move quadword from xmm1 register to r/m64
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002142//
2143def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002144 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002145 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2146 (iPTR 0)))],
Craig Topperae11aed2014-01-14 07:41:20 +00002147 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002148 Requires<[HasAVX512, In64BitMode]>;
2149
Elena Demikhovsky85aeffa2013-10-03 12:03:26 +00002150def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002151 (ins i64mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002152 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002153 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2154 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topperae11aed2014-01-14 07:41:20 +00002155 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002156 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2157
2158// Move Scalar Single to Double Int
2159//
Craig Topper88adf2a2013-10-12 05:41:08 +00002160let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002161def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002162 (ins FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002163 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002164 [(set GR32:$dst, (bitconvert FR32X:$src))],
2165 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002166def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002167 (ins i32mem:$dst, FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002168 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002169 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2170 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002171}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002172
2173// Move Quadword Int to Packed Quadword Int
2174//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002175def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002176 (ins i64mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002177 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002178 [(set VR128X:$dst,
2179 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2180 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2181
2182//===----------------------------------------------------------------------===//
2183// AVX-512 MOVSS, MOVSD
2184//===----------------------------------------------------------------------===//
2185
2186multiclass avx512_move_scalar <string asm, RegisterClass RC,
2187 SDNode OpNode, ValueType vt,
2188 X86MemOperand x86memop, PatFrag mem_pat> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002189 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002190 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002191 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002192 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2193 (scalar_to_vector RC:$src2))))],
2194 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002195 let Constraints = "$src1 = $dst" in
2196 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2197 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2198 !strconcat(asm,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002199 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002200 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002201 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002202 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002203 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2204 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002205 let mayStore = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002206 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002207 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002208 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2209 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002210 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2211 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2212 [], IIC_SSE_MOV_S_MR>,
2213 EVEX, VEX_LIG, EVEX_K;
2214 } // mayStore
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002215 } //hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002216}
2217
2218let ExeDomain = SSEPackedSingle in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002219defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002220 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2221
2222let ExeDomain = SSEPackedDouble in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002223defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002224 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2225
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002226def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2227 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2228 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2229
2230def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2231 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2232 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002233
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002234def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2235 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2236 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2237
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002238// For the disassembler
Craig Topper3484fc22014-01-05 04:17:28 +00002239let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002240 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2241 (ins VR128X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002242 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002243 IIC_SSE_MOV_S_RR>,
2244 XS, EVEX_4V, VEX_LIG;
2245 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2246 (ins VR128X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002247 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002248 IIC_SSE_MOV_S_RR>,
2249 XD, EVEX_4V, VEX_LIG, VEX_W;
2250}
2251
2252let Predicates = [HasAVX512] in {
2253 let AddedComplexity = 15 in {
2254 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2255 // MOVS{S,D} to the lower bits.
2256 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2257 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2258 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2259 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2260 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2261 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2262 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2263 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2264
2265 // Move low f32 and clear high bits.
2266 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2267 (SUBREG_TO_REG (i32 0),
2268 (VMOVSSZrr (v4f32 (V_SET0)),
2269 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2270 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2271 (SUBREG_TO_REG (i32 0),
2272 (VMOVSSZrr (v4i32 (V_SET0)),
2273 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2274 }
2275
2276 let AddedComplexity = 20 in {
2277 // MOVSSrm zeros the high parts of the register; represent this
2278 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2279 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2280 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2281 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2282 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2283 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2284 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2285
2286 // MOVSDrm zeros the high parts of the register; represent this
2287 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2288 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2289 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2290 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2291 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2292 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2293 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2294 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2295 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2296 def : Pat<(v2f64 (X86vzload addr:$src)),
2297 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2298
2299 // Represent the same patterns above but in the form they appear for
2300 // 256-bit types
2301 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2302 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002303 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002304 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2305 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2306 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2307 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2308 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2309 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2310 }
2311 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2312 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2313 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2314 FR32X:$src)), sub_xmm)>;
2315 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2316 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2317 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2318 FR64X:$src)), sub_xmm)>;
2319 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2320 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002321 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002322
2323 // Move low f64 and clear high bits.
2324 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2325 (SUBREG_TO_REG (i32 0),
2326 (VMOVSDZrr (v2f64 (V_SET0)),
2327 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2328
2329 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2330 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2331 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2332
2333 // Extract and store.
2334 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2335 addr:$dst),
2336 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2337 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2338 addr:$dst),
2339 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2340
2341 // Shuffle with VMOVSS
2342 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2343 (VMOVSSZrr (v4i32 VR128X:$src1),
2344 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2345 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2346 (VMOVSSZrr (v4f32 VR128X:$src1),
2347 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2348
2349 // 256-bit variants
2350 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2351 (SUBREG_TO_REG (i32 0),
2352 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2353 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2354 sub_xmm)>;
2355 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2356 (SUBREG_TO_REG (i32 0),
2357 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2358 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2359 sub_xmm)>;
2360
2361 // Shuffle with VMOVSD
2362 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2363 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2364 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2365 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2366 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2367 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2368 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2369 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2370
2371 // 256-bit variants
2372 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2373 (SUBREG_TO_REG (i32 0),
2374 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2375 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2376 sub_xmm)>;
2377 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2378 (SUBREG_TO_REG (i32 0),
2379 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2380 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2381 sub_xmm)>;
2382
2383 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2384 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2385 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2386 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2387 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2388 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2389 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2390 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2391}
2392
2393let AddedComplexity = 15 in
2394def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2395 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002396 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002397 [(set VR128X:$dst, (v2i64 (X86vzmovl
2398 (v2i64 VR128X:$src))))],
2399 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2400
2401let AddedComplexity = 20 in
2402def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2403 (ins i128mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002404 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002405 [(set VR128X:$dst, (v2i64 (X86vzmovl
2406 (loadv2i64 addr:$src))))],
2407 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2408 EVEX_CD8<8, CD8VT8>;
2409
2410let Predicates = [HasAVX512] in {
2411 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2412 let AddedComplexity = 20 in {
2413 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2414 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002415 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2416 (VMOV64toPQIZrr GR64:$src)>;
2417 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2418 (VMOVDI2PDIZrr GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002419
2420 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2421 (VMOVDI2PDIZrm addr:$src)>;
2422 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2423 (VMOVDI2PDIZrm addr:$src)>;
2424 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2425 (VMOVZPQILo2PQIZrm addr:$src)>;
2426 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2427 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00002428 def : Pat<(v2i64 (X86vzload addr:$src)),
2429 (VMOVZPQILo2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002430 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002431
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002432 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2433 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2434 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2435 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2436 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2437 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2438 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2439}
2440
2441def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2442 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2443
2444def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2445 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2446
2447def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2448 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2449
2450def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2451 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2452
2453//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00002454// AVX-512 - Non-temporals
2455//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00002456let SchedRW = [WriteLoad] in {
2457 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2458 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2459 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2460 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2461 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002462
Robert Khasanoved882972014-08-13 10:46:00 +00002463 let Predicates = [HasAVX512, HasVLX] in {
2464 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2465 (ins i256mem:$src),
2466 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2467 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2468 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002469
Robert Khasanoved882972014-08-13 10:46:00 +00002470 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2471 (ins i128mem:$src),
2472 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2473 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2474 EVEX_CD8<64, CD8VF>;
2475 }
Adam Nemetefd07852014-06-18 16:51:10 +00002476}
2477
Robert Khasanoved882972014-08-13 10:46:00 +00002478multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2479 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2480 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2481 let SchedRW = [WriteStore], mayStore = 1,
2482 AddedComplexity = 400 in
2483 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2484 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2485 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2486}
2487
2488multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2489 string elty, string elsz, string vsz512,
2490 string vsz256, string vsz128, Domain d,
2491 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2492 let Predicates = [prd] in
2493 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2494 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2495 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2496 EVEX_V512;
2497
2498 let Predicates = [prd, HasVLX] in {
2499 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2500 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2501 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2502 EVEX_V256;
2503
2504 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2505 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2506 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2507 EVEX_V128;
2508 }
2509}
2510
2511defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2512 "i", "64", "8", "4", "2", SSEPackedInt,
2513 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2514
2515defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2516 "f", "64", "8", "4", "2", SSEPackedDouble,
2517 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2518
2519defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2520 "f", "32", "16", "8", "4", SSEPackedSingle,
2521 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2522
Adam Nemet7f62b232014-06-10 16:39:53 +00002523//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002524// AVX-512 - Integer arithmetic
2525//
2526multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002527 ValueType OpVT, RegisterClass KRC,
2528 RegisterClass RC, PatFrag memop_frag,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002529 X86MemOperand x86memop, PatFrag scalar_mfrag,
2530 X86MemOperand x86scalar_mop, string BrdcstStr,
2531 OpndItins itins, bit IsCommutable = 0> {
2532 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002533 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2534 (ins RC:$src1, RC:$src2),
2535 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2536 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2537 itins.rr>, EVEX_4V;
2538 let AddedComplexity = 30 in {
2539 let Constraints = "$src0 = $dst" in
2540 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2541 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2542 !strconcat(OpcodeStr,
2543 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2544 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2545 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2546 RC:$src0)))],
2547 itins.rr>, EVEX_4V, EVEX_K;
2548 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2549 (ins KRC:$mask, RC:$src1, RC:$src2),
2550 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2551 "|$dst {${mask}} {z}, $src1, $src2}"),
2552 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2553 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2554 (OpVT immAllZerosV))))],
2555 itins.rr>, EVEX_4V, EVEX_KZ;
2556 }
2557
2558 let mayLoad = 1 in {
2559 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2560 (ins RC:$src1, x86memop:$src2),
2561 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2562 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2563 itins.rm>, EVEX_4V;
2564 let AddedComplexity = 30 in {
2565 let Constraints = "$src0 = $dst" in
2566 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2567 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2568 !strconcat(OpcodeStr,
2569 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2570 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2571 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2572 RC:$src0)))],
2573 itins.rm>, EVEX_4V, EVEX_K;
2574 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2575 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2576 !strconcat(OpcodeStr,
2577 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2578 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2579 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2580 (OpVT immAllZerosV))))],
2581 itins.rm>, EVEX_4V, EVEX_KZ;
2582 }
2583 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2584 (ins RC:$src1, x86scalar_mop:$src2),
2585 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2586 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2587 [(set RC:$dst, (OpNode RC:$src1,
2588 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2589 itins.rm>, EVEX_4V, EVEX_B;
2590 let AddedComplexity = 30 in {
2591 let Constraints = "$src0 = $dst" in
2592 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2593 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2594 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2595 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2596 BrdcstStr, "}"),
2597 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2598 (OpNode (OpVT RC:$src1),
2599 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2600 RC:$src0)))],
2601 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2602 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2603 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2604 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2605 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2606 BrdcstStr, "}"),
2607 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2608 (OpNode (OpVT RC:$src1),
2609 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2610 (OpVT immAllZerosV))))],
2611 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2612 }
2613 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002614}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002615
2616multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2617 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2618 PatFrag memop_frag, X86MemOperand x86memop,
2619 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2620 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002621 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002622 {
2623 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002624 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002625 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002626 []>, EVEX_4V;
2627 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2628 (ins KRC:$mask, RC:$src1, RC:$src2),
2629 !strconcat(OpcodeStr,
2630 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2631 [], itins.rr>, EVEX_4V, EVEX_K;
2632 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2633 (ins KRC:$mask, RC:$src1, RC:$src2),
2634 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2635 "|$dst {${mask}} {z}, $src1, $src2}"),
2636 [], itins.rr>, EVEX_4V, EVEX_KZ;
2637 }
2638 let mayLoad = 1 in {
2639 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2640 (ins RC:$src1, x86memop:$src2),
2641 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2642 []>, EVEX_4V;
2643 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2644 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2645 !strconcat(OpcodeStr,
2646 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2647 [], itins.rm>, EVEX_4V, EVEX_K;
2648 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2649 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2650 !strconcat(OpcodeStr,
2651 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2652 [], itins.rm>, EVEX_4V, EVEX_KZ;
2653 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2654 (ins RC:$src1, x86scalar_mop:$src2),
2655 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2656 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2657 [], itins.rm>, EVEX_4V, EVEX_B;
2658 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2659 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2660 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2661 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2662 BrdcstStr, "}"),
2663 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2664 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2665 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2666 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2667 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2668 BrdcstStr, "}"),
2669 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2670 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002671}
2672
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002673defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2674 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2675 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002676
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002677defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2678 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2679 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002680
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002681defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2682 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2683 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002684
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002685defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2686 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2687 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002688
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002689defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2690 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2691 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002692
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002693defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2694 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2695 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2696 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002697
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002698defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2699 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2700 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002701
2702def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2703 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2704
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002705def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2706 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2707 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2708def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2709 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2710 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2711
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002712defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2713 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2714 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002715 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002716defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2717 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2718 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002719 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002720
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002721defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2722 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2723 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002724 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002725defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2726 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2727 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002728 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002729
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002730defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2731 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2732 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002733 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002734defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2735 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2736 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002737 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002738
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002739defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2740 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2741 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002742 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002743defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2744 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2745 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002746 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002747
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002748def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2749 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2750 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2751def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2752 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2753 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2754def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2755 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2756 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2757def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2758 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2759 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2760def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2761 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2762 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2763def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2764 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2765 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2766def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2767 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2768 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2769def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2770 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2771 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002772//===----------------------------------------------------------------------===//
2773// AVX-512 - Unpack Instructions
2774//===----------------------------------------------------------------------===//
2775
2776multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2777 PatFrag mem_frag, RegisterClass RC,
2778 X86MemOperand x86memop, string asm,
2779 Domain d> {
2780 def rr : AVX512PI<opc, MRMSrcReg,
2781 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2782 asm, [(set RC:$dst,
2783 (vt (OpNode RC:$src1, RC:$src2)))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002784 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002785 def rm : AVX512PI<opc, MRMSrcMem,
2786 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2787 asm, [(set RC:$dst,
2788 (vt (OpNode RC:$src1,
2789 (bitconvert (mem_frag addr:$src2)))))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002790 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002791}
2792
2793defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2794 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002795 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002796defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2797 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002798 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002799defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2800 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002801 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002802defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2803 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002804 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002805
2806multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2807 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2808 X86MemOperand x86memop> {
2809 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2810 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002811 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002812 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2813 IIC_SSE_UNPCK>, EVEX_4V;
2814 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2815 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002816 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002817 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2818 (bitconvert (memop_frag addr:$src2)))))],
2819 IIC_SSE_UNPCK>, EVEX_4V;
2820}
2821defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2822 VR512, memopv16i32, i512mem>, EVEX_V512,
2823 EVEX_CD8<32, CD8VF>;
2824defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2825 VR512, memopv8i64, i512mem>, EVEX_V512,
2826 VEX_W, EVEX_CD8<64, CD8VF>;
2827defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2828 VR512, memopv16i32, i512mem>, EVEX_V512,
2829 EVEX_CD8<32, CD8VF>;
2830defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2831 VR512, memopv8i64, i512mem>, EVEX_V512,
2832 VEX_W, EVEX_CD8<64, CD8VF>;
2833//===----------------------------------------------------------------------===//
2834// AVX-512 - PSHUFD
2835//
2836
2837multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2838 SDNode OpNode, PatFrag mem_frag,
2839 X86MemOperand x86memop, ValueType OpVT> {
2840 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2841 (ins RC:$src1, i8imm:$src2),
2842 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002843 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002844 [(set RC:$dst,
2845 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2846 EVEX;
2847 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2848 (ins x86memop:$src1, i8imm:$src2),
2849 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002850 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002851 [(set RC:$dst,
2852 (OpVT (OpNode (mem_frag addr:$src1),
2853 (i8 imm:$src2))))]>, EVEX;
2854}
2855
2856defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
Craig Topperae11aed2014-01-14 07:41:20 +00002857 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002858
2859let ExeDomain = SSEPackedSingle in
2860defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002861 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002862 EVEX_CD8<32, CD8VF>;
2863let ExeDomain = SSEPackedDouble in
2864defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002865 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002866 VEX_W, EVEX_CD8<32, CD8VF>;
2867
2868def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2869 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2870def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2871 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2872
2873//===----------------------------------------------------------------------===//
2874// AVX-512 Logical Instructions
2875//===----------------------------------------------------------------------===//
2876
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002877defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002878 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2879 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002880defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002881 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2882 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002883defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002884 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2885 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002886defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002887 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2888 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002889defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002890 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2891 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002892defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002893 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2894 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002895defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002896 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2897 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002898defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2899 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2900 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002901
2902//===----------------------------------------------------------------------===//
2903// AVX-512 FP arithmetic
2904//===----------------------------------------------------------------------===//
2905
2906multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2907 SizeItins itins> {
Elena Demikhovskycf088092013-12-11 14:31:04 +00002908 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002909 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2910 EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00002911 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002912 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2913 EVEX_CD8<64, CD8VT1>;
2914}
2915
2916let isCommutable = 1 in {
2917defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2918defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2919defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2920defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2921}
2922let isCommutable = 0 in {
2923defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2924defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2925}
2926
2927multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002928 RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002929 RegisterClass RC, ValueType vt,
2930 X86MemOperand x86memop, PatFrag mem_frag,
2931 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2932 string BrdcstStr,
2933 Domain d, OpndItins itins, bit commutable> {
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002934 let isCommutable = commutable in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002935 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002936 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002937 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Craig Topperda7160d2014-02-01 08:17:56 +00002938 EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002939
2940 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2941 !strconcat(OpcodeStr,
2942 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2943 [], itins.rr, d>, EVEX_4V, EVEX_K;
2944
2945 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2946 !strconcat(OpcodeStr,
2947 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2948 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2949 }
2950
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002951 let mayLoad = 1 in {
2952 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002953 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002954 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
Craig Topperda7160d2014-02-01 08:17:56 +00002955 itins.rm, d>, EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002956
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002957 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2958 (ins RC:$src1, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002959 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002960 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002961 [(set RC:$dst, (OpNode RC:$src1,
2962 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
Craig Topperda7160d2014-02-01 08:17:56 +00002963 itins.rm, d>, EVEX_4V, EVEX_B;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002964
2965 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2966 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2967 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2968 [], itins.rm, d>, EVEX_4V, EVEX_K;
2969
2970 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2971 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2972 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2973 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2974
2975 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2976 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2977 " \t{${src2}", BrdcstStr,
2978 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2979 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2980
2981 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2982 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2983 " \t{${src2}", BrdcstStr,
2984 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2985 BrdcstStr, "}"),
2986 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2987 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002988}
2989
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002990defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002991 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002992 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002993
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002994defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002995 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2996 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002997 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002998
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002999defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003000 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00003001 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003002defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003003 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3004 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00003005 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003006
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003007defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003008 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3009 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003010 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003011defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003012 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
3013 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003014 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003015
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003016defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003017 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3018 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00003019 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003020defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003021 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3022 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00003023 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003024
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003025defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003026 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00003027 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003028defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003029 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00003030 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003031
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003032defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003033 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3034 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00003035 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00003036defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003037 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
3038 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00003039 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003040
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003041def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
3042 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3043 (i16 -1), FROUND_CURRENT)),
3044 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
3045
3046def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
3047 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3048 (i8 -1), FROUND_CURRENT)),
3049 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
3050
3051def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
3052 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3053 (i16 -1), FROUND_CURRENT)),
3054 (VMINPSZrr VR512:$src1, VR512:$src2)>;
3055
3056def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
3057 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3058 (i8 -1), FROUND_CURRENT)),
3059 (VMINPDZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003060//===----------------------------------------------------------------------===//
3061// AVX-512 VPTESTM instructions
3062//===----------------------------------------------------------------------===//
3063
3064multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3065 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3066 SDNode OpNode, ValueType vt> {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003067 def rr : AVX512PI<opc, MRMSrcReg,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003068 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003069 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003070 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
3071 SSEPackedInt>, EVEX_4V;
3072 def rm : AVX512PI<opc, MRMSrcMem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003073 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003074 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003075 [(set KRC:$dst, (OpNode (vt RC:$src1),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003076 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003077}
3078
3079defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003080 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003081 EVEX_CD8<32, CD8VF>;
3082defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003083 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003084 EVEX_CD8<64, CD8VF>;
3085
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003086let Predicates = [HasCDI] in {
3087defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
3088 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
3089 EVEX_CD8<32, CD8VF>;
3090defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003091 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003092 EVEX_CD8<64, CD8VF>;
3093}
3094
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003095def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3096 (v16i32 VR512:$src2), (i16 -1))),
3097 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3098
3099def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3100 (v8i64 VR512:$src2), (i8 -1))),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00003101 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003102//===----------------------------------------------------------------------===//
3103// AVX-512 Shift instructions
3104//===----------------------------------------------------------------------===//
3105multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3106 string OpcodeStr, SDNode OpNode, RegisterClass RC,
3107 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
3108 RegisterClass KRC> {
3109 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003110 (ins RC:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003111 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Lang Hames27839932013-10-21 17:51:24 +00003112 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003113 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3114 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003115 (ins KRC:$mask, RC:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003116 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003117 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003118 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3119 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003120 (ins x86memop:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003121 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003122 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
Lang Hames27839932013-10-21 17:51:24 +00003123 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003124 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003125 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003126 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003127 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003128 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3129}
3130
3131multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3132 RegisterClass RC, ValueType vt, ValueType SrcVT,
3133 PatFrag bc_frag, RegisterClass KRC> {
3134 // src2 is always 128-bit
3135 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3136 (ins RC:$src1, VR128X:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003137 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003138 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3139 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3140 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3141 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3142 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003143 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003144 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3145 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3146 (ins RC:$src1, i128mem:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003147 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003148 [(set RC:$dst, (vt (OpNode RC:$src1,
3149 (bc_frag (memopv2i64 addr:$src2)))))],
3150 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3151 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3152 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3153 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003154 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003155 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3156}
3157
3158defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3159 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3160 EVEX_V512, EVEX_CD8<32, CD8VF>;
3161defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3162 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3163 EVEX_CD8<32, CD8VQ>;
3164
3165defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3166 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3167 EVEX_CD8<64, CD8VF>, VEX_W;
3168defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3169 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3170 EVEX_CD8<64, CD8VQ>, VEX_W;
3171
3172defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3173 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3174 EVEX_CD8<32, CD8VF>;
3175defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3176 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3177 EVEX_CD8<32, CD8VQ>;
3178
3179defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3180 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3181 EVEX_CD8<64, CD8VF>, VEX_W;
3182defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3183 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3184 EVEX_CD8<64, CD8VQ>, VEX_W;
3185
3186defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3187 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3188 EVEX_V512, EVEX_CD8<32, CD8VF>;
3189defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3190 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3191 EVEX_CD8<32, CD8VQ>;
3192
3193defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3194 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3195 EVEX_CD8<64, CD8VF>, VEX_W;
3196defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3197 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3198 EVEX_CD8<64, CD8VQ>, VEX_W;
3199
3200//===-------------------------------------------------------------------===//
3201// Variable Bit Shifts
3202//===-------------------------------------------------------------------===//
3203multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3204 RegisterClass RC, ValueType vt,
3205 X86MemOperand x86memop, PatFrag mem_frag> {
3206 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3207 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003208 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003209 [(set RC:$dst,
3210 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3211 EVEX_4V;
3212 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3213 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003214 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003215 [(set RC:$dst,
3216 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3217 EVEX_4V;
3218}
3219
3220defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3221 i512mem, memopv16i32>, EVEX_V512,
3222 EVEX_CD8<32, CD8VF>;
3223defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3224 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3225 EVEX_CD8<64, CD8VF>;
3226defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3227 i512mem, memopv16i32>, EVEX_V512,
3228 EVEX_CD8<32, CD8VF>;
3229defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3230 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3231 EVEX_CD8<64, CD8VF>;
3232defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3233 i512mem, memopv16i32>, EVEX_V512,
3234 EVEX_CD8<32, CD8VF>;
3235defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3236 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3237 EVEX_CD8<64, CD8VF>;
3238
3239//===----------------------------------------------------------------------===//
3240// AVX-512 - MOVDDUP
3241//===----------------------------------------------------------------------===//
3242
3243multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3244 X86MemOperand x86memop, PatFrag memop_frag> {
3245def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003246 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003247 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3248def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003249 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003250 [(set RC:$dst,
3251 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3252}
3253
3254defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3255 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3256def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3257 (VMOVDDUPZrm addr:$src)>;
3258
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003259//===---------------------------------------------------------------------===//
3260// Replicate Single FP - MOVSHDUP and MOVSLDUP
3261//===---------------------------------------------------------------------===//
3262multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3263 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3264 X86MemOperand x86memop> {
3265 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003266 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003267 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3268 let mayLoad = 1 in
3269 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003270 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003271 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3272}
3273
3274defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3275 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3276 EVEX_CD8<32, CD8VF>;
3277defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3278 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3279 EVEX_CD8<32, CD8VF>;
3280
3281def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3282def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3283 (VMOVSHDUPZrm addr:$src)>;
3284def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3285def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3286 (VMOVSLDUPZrm addr:$src)>;
3287
3288//===----------------------------------------------------------------------===//
3289// Move Low to High and High to Low packed FP Instructions
3290//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003291def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3292 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003293 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003294 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3295 IIC_SSE_MOV_LH>, EVEX_4V;
3296def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3297 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003298 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003299 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3300 IIC_SSE_MOV_LH>, EVEX_4V;
3301
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003302let Predicates = [HasAVX512] in {
3303 // MOVLHPS patterns
3304 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3305 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3306 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3307 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003308
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003309 // MOVHLPS patterns
3310 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3311 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3312}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003313
3314//===----------------------------------------------------------------------===//
3315// FMA - Fused Multiply Operations
3316//
3317let Constraints = "$src1 = $dst" in {
3318multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3319 RegisterClass RC, X86MemOperand x86memop,
3320 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
Adam Nemet2e91ee52014-08-14 17:13:19 +00003321 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3322 RegisterClass KRC> {
3323 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3324 (ins RC:$src2, RC:$src3),
3325 OpcodeStr, "$src3, $src2", "$src2, $src3",
3326 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3327 AVX512FMA3Base;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003328
3329 let mayLoad = 1 in
3330 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3331 (ins RC:$src1, RC:$src2, x86memop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003332 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003333 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3334 (mem_frag addr:$src3))))]>;
3335 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3336 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003337 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003338 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3339 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3340 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3341}
3342} // Constraints = "$src1 = $dst"
3343
3344let ExeDomain = SSEPackedSingle in {
3345 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3346 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003347 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003348 EVEX_CD8<32, CD8VF>;
3349 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3350 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003351 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003352 EVEX_CD8<32, CD8VF>;
3353 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3354 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003355 X86Fmaddsub, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003356 EVEX_V512, EVEX_CD8<32, CD8VF>;
3357 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3358 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003359 X86Fmsubadd, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003360 EVEX_V512, EVEX_CD8<32, CD8VF>;
3361 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3362 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003363 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003364 EVEX_CD8<32, CD8VF>;
3365 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3366 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003367 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003368 EVEX_CD8<32, CD8VF>;
3369}
3370let ExeDomain = SSEPackedDouble in {
3371 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3372 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003373 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003374 VEX_W, EVEX_CD8<64, CD8VF>;
3375 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3376 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003377 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003378 EVEX_CD8<64, CD8VF>;
3379 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3380 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003381 X86Fmaddsub, v8f64, VK8WM>,
3382 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003383 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3384 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003385 X86Fmsubadd, v8f64, VK8WM>,
3386 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003387 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3388 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003389 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003390 EVEX_CD8<64, CD8VF>;
3391 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3392 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003393 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003394 EVEX_CD8<64, CD8VF>;
3395}
3396
3397let Constraints = "$src1 = $dst" in {
3398multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3399 RegisterClass RC, X86MemOperand x86memop,
3400 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3401 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3402 let mayLoad = 1 in
3403 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3404 (ins RC:$src1, RC:$src3, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003405 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003406 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3407 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3408 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003409 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003410 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3411 [(set RC:$dst, (OpNode RC:$src1,
3412 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3413}
3414} // Constraints = "$src1 = $dst"
3415
3416
3417let ExeDomain = SSEPackedSingle in {
3418 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3419 memopv16f32, f32mem, loadf32, "{1to16}",
3420 X86Fmadd, v16f32>, EVEX_V512,
3421 EVEX_CD8<32, CD8VF>;
3422 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3423 memopv16f32, f32mem, loadf32, "{1to16}",
3424 X86Fmsub, v16f32>, EVEX_V512,
3425 EVEX_CD8<32, CD8VF>;
3426 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3427 memopv16f32, f32mem, loadf32, "{1to16}",
3428 X86Fmaddsub, v16f32>,
3429 EVEX_V512, EVEX_CD8<32, CD8VF>;
3430 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3431 memopv16f32, f32mem, loadf32, "{1to16}",
3432 X86Fmsubadd, v16f32>,
3433 EVEX_V512, EVEX_CD8<32, CD8VF>;
3434 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3435 memopv16f32, f32mem, loadf32, "{1to16}",
3436 X86Fnmadd, v16f32>, EVEX_V512,
3437 EVEX_CD8<32, CD8VF>;
3438 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3439 memopv16f32, f32mem, loadf32, "{1to16}",
3440 X86Fnmsub, v16f32>, EVEX_V512,
3441 EVEX_CD8<32, CD8VF>;
3442}
3443let ExeDomain = SSEPackedDouble in {
3444 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3445 memopv8f64, f64mem, loadf64, "{1to8}",
3446 X86Fmadd, v8f64>, EVEX_V512,
3447 VEX_W, EVEX_CD8<64, CD8VF>;
3448 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3449 memopv8f64, f64mem, loadf64, "{1to8}",
3450 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3451 EVEX_CD8<64, CD8VF>;
3452 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3453 memopv8f64, f64mem, loadf64, "{1to8}",
3454 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3455 EVEX_CD8<64, CD8VF>;
3456 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3457 memopv8f64, f64mem, loadf64, "{1to8}",
3458 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3459 EVEX_CD8<64, CD8VF>;
3460 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3461 memopv8f64, f64mem, loadf64, "{1to8}",
3462 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3463 EVEX_CD8<64, CD8VF>;
3464 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3465 memopv8f64, f64mem, loadf64, "{1to8}",
3466 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3467 EVEX_CD8<64, CD8VF>;
3468}
3469
3470// Scalar FMA
3471let Constraints = "$src1 = $dst" in {
3472multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3473 RegisterClass RC, ValueType OpVT,
3474 X86MemOperand x86memop, Operand memop,
3475 PatFrag mem_frag> {
3476 let isCommutable = 1 in
3477 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3478 (ins RC:$src1, RC:$src2, RC:$src3),
3479 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003480 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003481 [(set RC:$dst,
3482 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3483 let mayLoad = 1 in
3484 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3485 (ins RC:$src1, RC:$src2, f128mem:$src3),
3486 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003487 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003488 [(set RC:$dst,
3489 (OpVT (OpNode RC:$src2, RC:$src1,
3490 (mem_frag addr:$src3))))]>;
3491}
3492
3493} // Constraints = "$src1 = $dst"
3494
Elena Demikhovskycf088092013-12-11 14:31:04 +00003495defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003496 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003497defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003498 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003499defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003500 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003501defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003502 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003503defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003504 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003505defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003506 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003507defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003508 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003509defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003510 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3511
3512//===----------------------------------------------------------------------===//
3513// AVX-512 Scalar convert from sign integer to float/double
3514//===----------------------------------------------------------------------===//
3515
3516multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3517 X86MemOperand x86memop, string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003518let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003519 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003520 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003521 EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003522 let mayLoad = 1 in
3523 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3524 (ins DstRC:$src1, x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003525 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003526 EVEX_4V;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003527} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003528}
Andrew Trick15a47742013-10-09 05:11:10 +00003529let Predicates = [HasAVX512] in {
Elena Demikhovskycf088092013-12-11 14:31:04 +00003530defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003531 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003532defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003533 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003534defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003535 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003536defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003537 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3538
3539def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3540 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3541def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003542 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003543def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3544 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3545def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003546 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003547
3548def : Pat<(f32 (sint_to_fp GR32:$src)),
3549 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3550def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003551 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003552def : Pat<(f64 (sint_to_fp GR32:$src)),
3553 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3554def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003555 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3556
Elena Demikhovskycf088092013-12-11 14:31:04 +00003557defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003558 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003559defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003560 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003561defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003562 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003563defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003564 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3565
3566def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3567 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3568def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3569 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3570def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3571 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3572def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3573 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3574
3575def : Pat<(f32 (uint_to_fp GR32:$src)),
3576 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3577def : Pat<(f32 (uint_to_fp GR64:$src)),
3578 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3579def : Pat<(f64 (uint_to_fp GR32:$src)),
3580 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3581def : Pat<(f64 (uint_to_fp GR64:$src)),
3582 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00003583}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003584
3585//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003586// AVX-512 Scalar convert from float/double to integer
3587//===----------------------------------------------------------------------===//
3588multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3589 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3590 string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003591let hasSideEffects = 0 in {
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003592 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003593 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003594 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3595 Requires<[HasAVX512]>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003596 let mayLoad = 1 in
3597 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003598 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003599 Requires<[HasAVX512]>;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003600} // hasSideEffects = 0
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003601}
3602let Predicates = [HasAVX512] in {
3603// Convert float/double to signed/unsigned int 32/64
3604defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003605 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003606 XS, EVEX_CD8<32, CD8VT1>;
3607defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003608 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003609 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3610defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003611 ssmem, sse_load_f32, "cvtss2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003612 XS, EVEX_CD8<32, CD8VT1>;
3613defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3614 int_x86_avx512_cvtss2usi64, ssmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003615 sse_load_f32, "cvtss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003616 EVEX_CD8<32, CD8VT1>;
3617defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003618 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003619 XD, EVEX_CD8<64, CD8VT1>;
3620defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003621 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003622 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3623defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003624 sdmem, sse_load_f64, "cvtsd2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003625 XD, EVEX_CD8<64, CD8VT1>;
3626defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3627 int_x86_avx512_cvtsd2usi64, sdmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003628 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003629 EVEX_CD8<64, CD8VT1>;
3630
Craig Topper9dd48c82014-01-02 17:28:14 +00003631let isCodeGenOnly = 1 in {
3632 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3633 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3634 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3635 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3636 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3637 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3638 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3639 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3640 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3641 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3642 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3643 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003644
Craig Topper9dd48c82014-01-02 17:28:14 +00003645 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3646 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3647 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3648 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3649 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3650 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3651 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3652 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3653 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3654 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3655 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3656 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3657} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003658
3659// Convert float/double to signed/unsigned int 32/64 with truncation
Craig Topper9dd48c82014-01-02 17:28:14 +00003660let isCodeGenOnly = 1 in {
3661 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3662 ssmem, sse_load_f32, "cvttss2si">,
3663 XS, EVEX_CD8<32, CD8VT1>;
3664 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3665 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3666 "cvttss2si">, XS, VEX_W,
3667 EVEX_CD8<32, CD8VT1>;
3668 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3669 sdmem, sse_load_f64, "cvttsd2si">, XD,
3670 EVEX_CD8<64, CD8VT1>;
3671 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3672 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3673 "cvttsd2si">, XD, VEX_W,
3674 EVEX_CD8<64, CD8VT1>;
3675 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3676 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3677 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3678 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3679 int_x86_avx512_cvttss2usi64, ssmem,
3680 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3681 EVEX_CD8<32, CD8VT1>;
3682 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3683 int_x86_avx512_cvttsd2usi,
3684 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3685 EVEX_CD8<64, CD8VT1>;
3686 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3687 int_x86_avx512_cvttsd2usi64, sdmem,
3688 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3689 EVEX_CD8<64, CD8VT1>;
3690} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003691
3692multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3693 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3694 string asm> {
3695 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003696 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003697 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3698 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003699 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003700 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3701}
3702
3703defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003704 loadf32, "cvttss2si">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003705 EVEX_CD8<32, CD8VT1>;
3706defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003707 loadf32, "cvttss2usi">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003708 EVEX_CD8<32, CD8VT1>;
3709defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003710 loadf32, "cvttss2si">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003711 EVEX_CD8<32, CD8VT1>;
3712defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003713 loadf32, "cvttss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003714 EVEX_CD8<32, CD8VT1>;
3715defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003716 loadf64, "cvttsd2si">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003717 EVEX_CD8<64, CD8VT1>;
3718defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003719 loadf64, "cvttsd2usi">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003720 EVEX_CD8<64, CD8VT1>;
3721defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003722 loadf64, "cvttsd2si">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003723 EVEX_CD8<64, CD8VT1>;
3724defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003725 loadf64, "cvttsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003726 EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003727} // HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003728//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003729// AVX-512 Convert form float to double and back
3730//===----------------------------------------------------------------------===//
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003731let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003732def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3733 (ins FR32X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003734 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003735 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3736let mayLoad = 1 in
3737def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3738 (ins FR32X:$src1, f32mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003739 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003740 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3741 EVEX_CD8<32, CD8VT1>;
3742
3743// Convert scalar double to scalar single
3744def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3745 (ins FR64X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003746 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003747 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3748let mayLoad = 1 in
3749def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3750 (ins FR64X:$src1, f64mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003751 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003752 []>, EVEX_4V, VEX_LIG, VEX_W,
3753 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3754}
3755
3756def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3757 Requires<[HasAVX512]>;
3758def : Pat<(fextend (loadf32 addr:$src)),
3759 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3760
3761def : Pat<(extloadf32 addr:$src),
3762 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3763 Requires<[HasAVX512, OptForSize]>;
3764
3765def : Pat<(extloadf32 addr:$src),
3766 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3767 Requires<[HasAVX512, OptForSpeed]>;
3768
3769def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3770 Requires<[HasAVX512]>;
3771
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003772multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003773 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3774 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3775 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003776let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003777 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003778 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003779 [(set DstRC:$dst,
3780 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003781 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003782 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003783 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003784 let mayLoad = 1 in
3785 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003786 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003787 [(set DstRC:$dst,
3788 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003789} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003790}
3791
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003792multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003793 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3794 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3795 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003796let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003797 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003798 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003799 [(set DstRC:$dst,
3800 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3801 let mayLoad = 1 in
3802 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003803 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003804 [(set DstRC:$dst,
3805 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003806} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003807}
3808
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003809defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003810 memopv8f64, f512mem, v8f32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003811 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003812 EVEX_CD8<64, CD8VF>;
3813
3814defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3815 memopv4f64, f256mem, v8f64, v8f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003816 SSEPackedDouble>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003817 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003818def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3819 (VCVTPS2PDZrm addr:$src)>;
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00003820
3821def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3822 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3823 (VCVTPD2PSZrr VR512:$src)>;
3824
3825def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3826 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3827 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003828
3829//===----------------------------------------------------------------------===//
3830// AVX-512 Vector convert from sign integer to float/double
3831//===----------------------------------------------------------------------===//
3832
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003833defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003834 memopv8i64, i512mem, v16f32, v16i32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003835 SSEPackedSingle>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003836 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003837
3838defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3839 memopv4i64, i256mem, v8f64, v8i32,
3840 SSEPackedDouble>, EVEX_V512, XS,
3841 EVEX_CD8<32, CD8VH>;
3842
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003843defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003844 memopv16f32, f512mem, v16i32, v16f32,
3845 SSEPackedSingle>, EVEX_V512, XS,
3846 EVEX_CD8<32, CD8VF>;
3847
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003848defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003849 memopv8f64, f512mem, v8i32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003850 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003851 EVEX_CD8<64, CD8VF>;
3852
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003853defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003854 memopv16f32, f512mem, v16i32, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003855 SSEPackedSingle>, EVEX_V512, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003856 EVEX_CD8<32, CD8VF>;
3857
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003858// cvttps2udq (src, 0, mask-all-ones, sae-current)
3859def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3860 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3861 (VCVTTPS2UDQZrr VR512:$src)>;
3862
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003863defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003864 memopv8f64, f512mem, v8i32, v8f64,
Craig Topper5ccb6172014-02-18 00:21:49 +00003865 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003866 EVEX_CD8<64, CD8VF>;
3867
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003868// cvttpd2udq (src, 0, mask-all-ones, sae-current)
3869def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3870 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3871 (VCVTTPD2UDQZrr VR512:$src)>;
3872
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003873defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3874 memopv4i64, f256mem, v8f64, v8i32,
3875 SSEPackedDouble>, EVEX_V512, XS,
3876 EVEX_CD8<32, CD8VH>;
3877
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003878defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003879 memopv16i32, f512mem, v16f32, v16i32,
3880 SSEPackedSingle>, EVEX_V512, XD,
3881 EVEX_CD8<32, CD8VF>;
3882
3883def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3884 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3885 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3886
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00003887def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3888 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3889 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3890
3891def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3892 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3893 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3894
3895def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3896 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3897 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003898
Cameron McInallyf10a7c92014-06-18 14:04:37 +00003899def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3900 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3901 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3902
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003903def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003904 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003905 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003906def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3907 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3908 (VCVTDQ2PDZrr VR256X:$src)>;
3909def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3910 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3911 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3912def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3913 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3914 (VCVTUDQ2PDZrr VR256X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003915
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003916multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3917 RegisterClass DstRC, PatFrag mem_frag,
3918 X86MemOperand x86memop, Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003919let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003920 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003921 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003922 [], d>, EVEX;
3923 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003924 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003925 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003926 let mayLoad = 1 in
3927 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003928 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003929 [], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003930} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003931}
3932
3933defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
Craig Topperae11aed2014-01-14 07:41:20 +00003934 memopv16f32, f512mem, SSEPackedSingle>, PD,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003935 EVEX_V512, EVEX_CD8<32, CD8VF>;
3936defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3937 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3938 EVEX_V512, EVEX_CD8<64, CD8VF>;
3939
3940def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3941 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3942 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3943
3944def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3945 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3946 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3947
3948defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3949 memopv16f32, f512mem, SSEPackedSingle>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003950 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003951defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3952 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
Craig Topper5ccb6172014-02-18 00:21:49 +00003953 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003954
3955def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3956 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3957 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3958
3959def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3960 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3961 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003962
3963let Predicates = [HasAVX512] in {
3964 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3965 (VCVTPD2PSZrm addr:$src)>;
3966 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3967 (VCVTPS2PDZrm addr:$src)>;
3968}
3969
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003970//===----------------------------------------------------------------------===//
3971// Half precision conversion instructions
3972//===----------------------------------------------------------------------===//
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003973multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3974 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003975 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3976 "vcvtph2ps\t{$src, $dst|$dst, $src}",
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003977 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003978 let hasSideEffects = 0, mayLoad = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003979 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3980 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3981}
3982
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003983multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3984 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003985 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3986 (ins srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003987 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3988 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003989 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003990 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3991 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003992 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003993}
3994
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003995defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003996 EVEX_CD8<32, CD8VH>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003997defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003998 EVEX_CD8<32, CD8VH>;
3999
Elena Demikhovskya30e4372014-02-05 07:05:03 +00004000def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
4001 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
4002 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
4003
4004def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
4005 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
4006 (VCVTPH2PSZrr VR256X:$src)>;
4007
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004008let Defs = [EFLAGS], Predicates = [HasAVX512] in {
4009 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004010 "ucomiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004011 EVEX_CD8<32, CD8VT1>;
4012 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Craig Topperae11aed2014-01-14 07:41:20 +00004013 "ucomisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004014 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4015 let Pattern = []<dag> in {
4016 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
Craig Topper5ccb6172014-02-18 00:21:49 +00004017 "comiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004018 EVEX_CD8<32, CD8VT1>;
4019 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
Craig Topperae11aed2014-01-14 07:41:20 +00004020 "comisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004021 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4022 }
Craig Topper9dd48c82014-01-02 17:28:14 +00004023 let isCodeGenOnly = 1 in {
4024 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00004025 load, "ucomiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00004026 EVEX_CD8<32, CD8VT1>;
4027 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00004028 load, "ucomisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00004029 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004030
Craig Topper9dd48c82014-01-02 17:28:14 +00004031 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00004032 load, "comiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00004033 EVEX_CD8<32, CD8VT1>;
4034 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00004035 load, "comisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00004036 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
4037 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004038}
4039
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004040/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
4041multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4042 X86MemOperand x86memop> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004043 let hasSideEffects = 0 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004044 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4045 (ins RC:$src1, RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004046 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004047 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004048 let mayLoad = 1 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004049 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4050 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004051 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004052 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004053 }
4054}
4055}
4056
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004057defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
4058 EVEX_CD8<32, CD8VT1>;
4059defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
4060 VEX_W, EVEX_CD8<64, CD8VT1>;
4061defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
4062 EVEX_CD8<32, CD8VT1>;
4063defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
4064 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004065
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004066def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
4067 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4068 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4069 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004070
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004071def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
4072 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4073 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4074 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004075
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004076def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
4077 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4078 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4079 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004080
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004081def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
4082 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4083 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4084 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004085
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004086/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
4087multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
4088 RegisterClass RC, X86MemOperand x86memop,
4089 PatFrag mem_frag, ValueType OpVt> {
4090 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4091 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004092 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004093 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
4094 EVEX;
4095 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004096 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004097 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
4098 EVEX;
4099}
4100defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
4101 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4102defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
4103 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4104defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
4105 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4106defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
4107 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4108
4109def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
4110 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4111 (VRSQRT14PSZr VR512:$src)>;
4112def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
4113 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4114 (VRSQRT14PDZr VR512:$src)>;
4115
4116def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4117 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4118 (VRCP14PSZr VR512:$src)>;
4119def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4120 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4121 (VRCP14PDZr VR512:$src)>;
4122
4123/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4124multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4125 X86MemOperand x86memop> {
4126 let hasSideEffects = 0, Predicates = [HasERI] in {
4127 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4128 (ins RC:$src1, RC:$src2),
4129 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004130 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004131 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4132 (ins RC:$src1, RC:$src2),
4133 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004134 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004135 []>, EVEX_4V, EVEX_B;
4136 let mayLoad = 1 in {
4137 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4138 (ins RC:$src1, x86memop:$src2),
4139 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004140 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004141 }
4142}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004143}
4144
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004145defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4146 EVEX_CD8<32, CD8VT1>;
4147defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4148 VEX_W, EVEX_CD8<64, CD8VT1>;
4149defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4150 EVEX_CD8<32, CD8VT1>;
4151defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4152 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004153
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004154def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4155 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4156 FROUND_NO_EXC)),
4157 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4158 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4159
4160def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4161 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4162 FROUND_NO_EXC)),
4163 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4164 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4165
4166def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4167 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4168 FROUND_NO_EXC)),
4169 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4170 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4171
4172def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4173 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4174 FROUND_NO_EXC)),
4175 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4176 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4177
4178/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4179multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4180 RegisterClass RC, X86MemOperand x86memop> {
4181 let hasSideEffects = 0, Predicates = [HasERI] in {
4182 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4183 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004184 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004185 []>, EVEX;
4186 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4187 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004188 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004189 []>, EVEX, EVEX_B;
4190 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004191 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004192 []>, EVEX;
4193 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004194}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004195defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4196 EVEX_V512, EVEX_CD8<32, CD8VF>;
4197defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4198 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4199defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4200 EVEX_V512, EVEX_CD8<32, CD8VF>;
4201defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4202 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4203
4204def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4205 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4206 (VRSQRT28PSZrb VR512:$src)>;
4207def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4208 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4209 (VRSQRT28PDZrb VR512:$src)>;
4210
4211def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4212 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4213 (VRCP28PSZrb VR512:$src)>;
4214def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4215 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4216 (VRCP28PDZrb VR512:$src)>;
4217
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004218multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004219 OpndItins itins_s, OpndItins itins_d> {
4220 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004221 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004222 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4223 EVEX, EVEX_V512;
4224
4225 let mayLoad = 1 in
4226 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004227 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004228 [(set VR512:$dst,
4229 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4230 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4231
4232 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004233 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004234 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4235 EVEX, EVEX_V512;
4236
4237 let mayLoad = 1 in
4238 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004239 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004240 [(set VR512:$dst, (OpNode
4241 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4242 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4243
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004244}
4245
4246multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4247 Intrinsic F32Int, Intrinsic F64Int,
4248 OpndItins itins_s, OpndItins itins_d> {
4249 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4250 (ins FR32X:$src1, FR32X:$src2),
4251 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004252 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004253 [], itins_s.rr>, XS, EVEX_4V;
Craig Topper9dd48c82014-01-02 17:28:14 +00004254 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004255 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4256 (ins VR128X:$src1, VR128X:$src2),
4257 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004258 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004259 [(set VR128X:$dst,
4260 (F32Int VR128X:$src1, VR128X:$src2))],
4261 itins_s.rr>, XS, EVEX_4V;
4262 let mayLoad = 1 in {
4263 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4264 (ins FR32X:$src1, f32mem:$src2),
4265 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004266 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004267 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004268 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004269 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4270 (ins VR128X:$src1, ssmem:$src2),
4271 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004272 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004273 [(set VR128X:$dst,
4274 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4275 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4276 }
4277 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4278 (ins FR64X:$src1, FR64X:$src2),
4279 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004280 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004281 XD, EVEX_4V, VEX_W;
Craig Topper9dd48c82014-01-02 17:28:14 +00004282 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004283 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4284 (ins VR128X:$src1, VR128X:$src2),
4285 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004286 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004287 [(set VR128X:$dst,
4288 (F64Int VR128X:$src1, VR128X:$src2))],
4289 itins_s.rr>, XD, EVEX_4V, VEX_W;
4290 let mayLoad = 1 in {
4291 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4292 (ins FR64X:$src1, f64mem:$src2),
4293 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004294 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004295 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004296 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004297 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4298 (ins VR128X:$src1, sdmem:$src2),
4299 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004300 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004301 [(set VR128X:$dst,
4302 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4303 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4304 }
4305}
4306
4307
4308defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4309 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4310 SSE_SQRTSS, SSE_SQRTSD>,
4311 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004312 SSE_SQRTPS, SSE_SQRTPD>;
4313
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004314let Predicates = [HasAVX512] in {
Elena Demikhovskyf1648592014-07-22 11:07:31 +00004315 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4316 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4317 (VSQRTPSZrr VR512:$src1)>;
4318 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4319 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4320 (VSQRTPDZrr VR512:$src1)>;
4321
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004322 def : Pat<(f32 (fsqrt FR32X:$src)),
4323 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4324 def : Pat<(f32 (fsqrt (load addr:$src))),
4325 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4326 Requires<[OptForSize]>;
4327 def : Pat<(f64 (fsqrt FR64X:$src)),
4328 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4329 def : Pat<(f64 (fsqrt (load addr:$src))),
4330 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4331 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004332
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004333 def : Pat<(f32 (X86frsqrt FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004334 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004335 def : Pat<(f32 (X86frsqrt (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004336 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004337 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004338
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004339 def : Pat<(f32 (X86frcp FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004340 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004341 def : Pat<(f32 (X86frcp (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004342 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004343 Requires<[OptForSize]>;
4344
4345 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4346 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4347 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4348 VR128X)>;
4349 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4350 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4351
4352 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4353 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4354 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4355 VR128X)>;
4356 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4357 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4358}
4359
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004360
4361multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4362 X86MemOperand x86memop, RegisterClass RC,
4363 PatFrag mem_frag32, PatFrag mem_frag64,
4364 Intrinsic V4F32Int, Intrinsic V2F64Int,
4365 CD8VForm VForm> {
4366let ExeDomain = SSEPackedSingle in {
4367 // Intrinsic operation, reg.
4368 // Vector intrinsic operation, reg
4369 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4370 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4371 !strconcat(OpcodeStr,
4372 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4373 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4374
4375 // Vector intrinsic operation, mem
4376 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4377 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4378 !strconcat(OpcodeStr,
4379 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4380 [(set RC:$dst,
4381 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4382 EVEX_CD8<32, VForm>;
4383} // ExeDomain = SSEPackedSingle
4384
4385let ExeDomain = SSEPackedDouble in {
4386 // Vector intrinsic operation, reg
4387 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4388 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4389 !strconcat(OpcodeStr,
4390 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4391 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4392
4393 // Vector intrinsic operation, mem
4394 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4395 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4396 !strconcat(OpcodeStr,
4397 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4398 [(set RC:$dst,
4399 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4400 EVEX_CD8<64, VForm>;
4401} // ExeDomain = SSEPackedDouble
4402}
4403
4404multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4405 string OpcodeStr,
4406 Intrinsic F32Int,
4407 Intrinsic F64Int> {
4408let ExeDomain = GenericDomain in {
4409 // Operation, reg.
4410 let hasSideEffects = 0 in
4411 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4412 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4413 !strconcat(OpcodeStr,
4414 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4415 []>;
4416
4417 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004418 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004419 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4420 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4421 !strconcat(OpcodeStr,
4422 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4423 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4424
4425 // Intrinsic operation, mem.
4426 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4427 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4428 !strconcat(OpcodeStr,
4429 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4430 [(set VR128X:$dst, (F32Int VR128X:$src1,
4431 sse_load_f32:$src2, imm:$src3))]>,
4432 EVEX_CD8<32, CD8VT1>;
4433
4434 // Operation, reg.
4435 let hasSideEffects = 0 in
4436 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4437 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4438 !strconcat(OpcodeStr,
4439 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4440 []>, VEX_W;
4441
4442 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004443 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004444 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4445 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4446 !strconcat(OpcodeStr,
4447 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4448 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4449 VEX_W;
4450
4451 // Intrinsic operation, mem.
4452 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4453 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4454 !strconcat(OpcodeStr,
4455 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4456 [(set VR128X:$dst,
4457 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4458 VEX_W, EVEX_CD8<64, CD8VT1>;
4459} // ExeDomain = GenericDomain
4460}
4461
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004462multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4463 X86MemOperand x86memop, RegisterClass RC,
4464 PatFrag mem_frag, Domain d> {
4465let ExeDomain = d in {
4466 // Intrinsic operation, reg.
4467 // Vector intrinsic operation, reg
4468 def r : AVX512AIi8<opc, MRMSrcReg,
4469 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4470 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004471 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004472 []>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004473
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004474 // Vector intrinsic operation, mem
4475 def m : AVX512AIi8<opc, MRMSrcMem,
4476 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4477 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004478 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004479 []>, EVEX;
4480} // ExeDomain
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004481}
4482
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004483
4484defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4485 memopv16f32, SSEPackedSingle>, EVEX_V512,
4486 EVEX_CD8<32, CD8VF>;
4487
4488def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004489 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004490 FROUND_CURRENT)),
4491 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4492
4493
4494defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4495 memopv8f64, SSEPackedDouble>, EVEX_V512,
4496 VEX_W, EVEX_CD8<64, CD8VF>;
4497
4498def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004499 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004500 FROUND_CURRENT)),
4501 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4502
4503multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4504 Operand x86memop, RegisterClass RC, Domain d> {
4505let ExeDomain = d in {
4506 def r : AVX512AIi8<opc, MRMSrcReg,
4507 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4508 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004509 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004510 []>, EVEX_4V;
4511
4512 def m : AVX512AIi8<opc, MRMSrcMem,
4513 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4514 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004515 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004516 []>, EVEX_4V;
4517} // ExeDomain
4518}
4519
4520defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4521 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4522
4523defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4524 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4525
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004526def : Pat<(ffloor FR32X:$src),
4527 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4528def : Pat<(f64 (ffloor FR64X:$src)),
4529 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4530def : Pat<(f32 (fnearbyint FR32X:$src)),
4531 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4532def : Pat<(f64 (fnearbyint FR64X:$src)),
4533 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4534def : Pat<(f32 (fceil FR32X:$src)),
4535 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4536def : Pat<(f64 (fceil FR64X:$src)),
4537 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4538def : Pat<(f32 (frint FR32X:$src)),
4539 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4540def : Pat<(f64 (frint FR64X:$src)),
4541 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4542def : Pat<(f32 (ftrunc FR32X:$src)),
4543 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4544def : Pat<(f64 (ftrunc FR64X:$src)),
4545 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4546
4547def : Pat<(v16f32 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004548 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004549def : Pat<(v16f32 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004550 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004551def : Pat<(v16f32 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004552 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004553def : Pat<(v16f32 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004554 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004555def : Pat<(v16f32 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004556 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004557
4558def : Pat<(v8f64 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004559 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004560def : Pat<(v8f64 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004561 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004562def : Pat<(v8f64 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004563 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004564def : Pat<(v8f64 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004565 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004566def : Pat<(v8f64 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004567 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004568
4569//-------------------------------------------------
4570// Integer truncate and extend operations
4571//-------------------------------------------------
4572
4573multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4574 RegisterClass dstRC, RegisterClass srcRC,
4575 RegisterClass KRC, X86MemOperand x86memop> {
4576 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4577 (ins srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004578 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004579 []>, EVEX;
4580
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004581 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4582 (ins KRC:$mask, srcRC:$src),
4583 !strconcat(OpcodeStr,
4584 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4585 []>, EVEX, EVEX_K;
4586
4587 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004588 (ins KRC:$mask, srcRC:$src),
4589 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004590 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004591 []>, EVEX, EVEX_KZ;
4592
4593 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004594 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004595 []>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004596
4597 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4598 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4599 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4600 []>, EVEX, EVEX_K;
4601
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004602}
4603defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4604 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4605defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4606 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4607defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4608 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4609defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4610 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4611defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4612 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4613defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4614 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4615defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4616 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4617defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4618 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4619defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4620 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4621defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4622 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4623defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4624 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4625defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4626 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4627defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4628 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4629defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4630 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4631defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4632 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4633
4634def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4635def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4636def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4637def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4638def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4639
4640def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004641 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004642def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004643 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004644def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004645 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004646def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004647 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004648
4649
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004650multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4651 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4652 PatFrag mem_frag, X86MemOperand x86memop,
4653 ValueType OpVT, ValueType InVT> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004654
4655 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4656 (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004657 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004658 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004659
4660 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4661 (ins KRC:$mask, SrcRC:$src),
4662 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4663 []>, EVEX, EVEX_K;
4664
4665 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4666 (ins KRC:$mask, SrcRC:$src),
4667 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4668 []>, EVEX, EVEX_KZ;
4669
4670 let mayLoad = 1 in {
4671 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004672 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004673 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004674 [(set DstRC:$dst,
4675 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4676 EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004677
4678 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4679 (ins KRC:$mask, x86memop:$src),
4680 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4681 []>,
4682 EVEX, EVEX_K;
4683
4684 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4685 (ins KRC:$mask, x86memop:$src),
4686 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4687 []>,
4688 EVEX, EVEX_KZ;
4689 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004690}
4691
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004692defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004693 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4694 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004695defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004696 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4697 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004698defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004699 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4700 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004701defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004702 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4703 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004704defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004705 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4706 EVEX_CD8<32, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004707
4708defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004709 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4710 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004711defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004712 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4713 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004714defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004715 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4716 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004717defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004718 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4719 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004720defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004721 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4722 EVEX_CD8<32, CD8VH>;
4723
4724//===----------------------------------------------------------------------===//
4725// GATHER - SCATTER Operations
4726
4727multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4728 RegisterClass RC, X86MemOperand memop> {
4729let mayLoad = 1,
4730 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4731 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4732 (ins RC:$src1, KRC:$mask, memop:$src2),
4733 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004734 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004735 []>, EVEX, EVEX_K;
4736}
Cameron McInally45325962014-03-26 13:50:50 +00004737
4738let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004739defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4740 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004741defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4742 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004743}
4744
4745let ExeDomain = SSEPackedSingle in {
4746defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4747 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004748defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4749 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004750}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004751
4752defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4753 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4754defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4755 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4756
4757defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4758 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4759defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4760 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4761
4762multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4763 RegisterClass RC, X86MemOperand memop> {
4764let mayStore = 1, Constraints = "$mask = $mask_wb" in
4765 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4766 (ins memop:$dst, KRC:$mask, RC:$src2),
4767 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004768 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004769 []>, EVEX, EVEX_K;
4770}
4771
Cameron McInally45325962014-03-26 13:50:50 +00004772let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004773defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4774 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004775defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4776 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004777}
4778
4779let ExeDomain = SSEPackedSingle in {
4780defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4781 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004782defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4783 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004784}
4785
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004786defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4787 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4788defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4789 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4790
4791defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4792 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4793defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4794 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4795
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00004796// prefetch
4797multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4798 RegisterClass KRC, X86MemOperand memop> {
4799 let Predicates = [HasPFI], hasSideEffects = 1 in
4800 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4801 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4802 []>, EVEX, EVEX_K;
4803}
4804
4805defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4806 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4807
4808defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4809 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4810
4811defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4812 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4813
4814defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4815 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4816
4817defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4818 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4819
4820defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4821 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4822
4823defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4824 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4825
4826defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4827 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4828
4829defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4830 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4831
4832defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4833 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4834
4835defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4836 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4837
4838defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4839 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4840
4841defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4842 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4843
4844defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4845 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4846
4847defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4848 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4849
4850defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4851 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004852//===----------------------------------------------------------------------===//
4853// VSHUFPS - VSHUFPD Operations
4854
4855multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4856 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4857 Domain d> {
4858 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4859 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4860 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004861 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004862 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4863 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004864 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004865 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4866 (ins RC:$src1, RC:$src2, i8imm:$src3),
4867 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004868 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004869 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4870 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004871 EVEX_4V, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004872}
4873
4874defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004875 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004876defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00004877 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004878
Elena Demikhovsky462a2d22013-10-06 06:11:18 +00004879def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4880 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4881def : Pat<(v16i32 (X86Shufp VR512:$src1,
4882 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4883 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4884
4885def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4886 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4887def : Pat<(v8i64 (X86Shufp VR512:$src1,
4888 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4889 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004890
Adam Nemet5ed17da2014-08-21 19:50:07 +00004891multiclass avx512_valign<X86VectorVTInfo _> {
4892 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4893 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4894 "valign"##_.Suffix,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004895 "$src3, $src2, $src1", "$src1, $src2, $src3",
Adam Nemet5ed17da2014-08-21 19:50:07 +00004896 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4897 (i8 imm:$src3))),
4898 _.VT, _.RC, _.KRCWM>,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004899 AVX512AIi8Base, EVEX_4V;
Adam Nemetfd2161b2014-08-05 17:23:04 +00004900
Adam Nemetf92139d2014-08-05 17:22:50 +00004901 // Also match valign of packed floats.
Adam Nemet5ed17da2014-08-21 19:50:07 +00004902 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4903 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
Adam Nemetf92139d2014-08-05 17:22:50 +00004904
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00004905 let mayLoad = 1 in
Adam Nemet5ed17da2014-08-21 19:50:07 +00004906 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4907 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4908 !strconcat("valign"##_.Suffix,
Adam Nemet1c752d82014-08-05 17:22:47 +00004909 " \t{$src3, $src2, $src1, $dst|"
4910 "$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004911 []>, EVEX_4V;
4912}
Adam Nemet5ed17da2014-08-21 19:50:07 +00004913defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4914defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004915
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004916// Helper fragments to match sext vXi1 to vXiY.
4917def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4918def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4919
4920multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4921 RegisterClass KRC, RegisterClass RC,
4922 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4923 string BrdcstStr> {
4924 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4925 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4926 []>, EVEX;
4927 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4928 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4929 []>, EVEX, EVEX_K;
4930 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4931 !strconcat(OpcodeStr,
4932 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4933 []>, EVEX, EVEX_KZ;
4934 let mayLoad = 1 in {
4935 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4936 (ins x86memop:$src),
4937 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4938 []>, EVEX;
4939 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4940 (ins KRC:$mask, x86memop:$src),
4941 !strconcat(OpcodeStr,
4942 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4943 []>, EVEX, EVEX_K;
4944 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4945 (ins KRC:$mask, x86memop:$src),
4946 !strconcat(OpcodeStr,
4947 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4948 []>, EVEX, EVEX_KZ;
4949 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4950 (ins x86scalar_mop:$src),
4951 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4952 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4953 []>, EVEX, EVEX_B;
4954 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4955 (ins KRC:$mask, x86scalar_mop:$src),
4956 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4957 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4958 []>, EVEX, EVEX_B, EVEX_K;
4959 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4960 (ins KRC:$mask, x86scalar_mop:$src),
4961 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4962 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4963 BrdcstStr, "}"),
4964 []>, EVEX, EVEX_B, EVEX_KZ;
4965 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004966}
4967
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004968defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4969 i512mem, i32mem, "{1to16}">, EVEX_V512,
4970 EVEX_CD8<32, CD8VF>;
4971defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4972 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4973 EVEX_CD8<64, CD8VF>;
4974
4975def : Pat<(xor
4976 (bc_v16i32 (v16i1sextv16i32)),
4977 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4978 (VPABSDZrr VR512:$src)>;
4979def : Pat<(xor
4980 (bc_v8i64 (v8i1sextv8i64)),
4981 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4982 (VPABSQZrr VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004983
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004984def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4985 (v16i32 immAllZerosV), (i16 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004986 (VPABSDZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004987def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4988 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004989 (VPABSQZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004990
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004991multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004992 RegisterClass RC, RegisterClass KRC,
4993 X86MemOperand x86memop,
4994 X86MemOperand x86scalar_mop, string BrdcstStr> {
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004995 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4996 (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004997 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004998 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004999 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5000 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005001 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005002 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005003 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5004 (ins x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005005 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005006 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
5007 []>, EVEX, EVEX_B;
5008 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
5009 (ins KRC:$mask, RC:$src),
5010 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005011 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005012 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005013 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5014 (ins KRC:$mask, x86memop:$src),
5015 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005016 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005017 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005018 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5019 (ins KRC:$mask, x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005020 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005021 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
5022 BrdcstStr, "}"),
5023 []>, EVEX, EVEX_KZ, EVEX_B;
5024
5025 let Constraints = "$src1 = $dst" in {
5026 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
5027 (ins RC:$src1, KRC:$mask, RC:$src2),
5028 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005029 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005030 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005031 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5032 (ins RC:$src1, KRC:$mask, x86memop:$src2),
5033 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005034 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005035 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005036 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
5037 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00005038 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005039 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
5040 []>, EVEX, EVEX_K, EVEX_B;
5041 }
5042}
5043
5044let Predicates = [HasCDI] in {
5045defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005046 i512mem, i32mem, "{1to16}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005047 EVEX_V512, EVEX_CD8<32, CD8VF>;
5048
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005049
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005050defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005051 i512mem, i64mem, "{1to8}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005052 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005053
Elena Demikhovskydacddb02013-11-03 13:46:31 +00005054}
Elena Demikhovsky6270b382013-12-10 11:58:35 +00005055
5056def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
5057 GR16:$mask),
5058 (VPCONFLICTDrrk VR512:$src1,
5059 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5060
5061def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
5062 GR8:$mask),
5063 (VPCONFLICTQrrk VR512:$src1,
5064 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005065
Cameron McInally5d1b7b92014-06-11 12:54:45 +00005066let Predicates = [HasCDI] in {
5067defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
5068 i512mem, i32mem, "{1to16}">,
5069 EVEX_V512, EVEX_CD8<32, CD8VF>;
5070
5071
5072defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
5073 i512mem, i64mem, "{1to8}">,
5074 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5075
5076}
5077
5078def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
5079 GR16:$mask),
5080 (VPLZCNTDrrk VR512:$src1,
5081 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5082
5083def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
5084 GR8:$mask),
5085 (VPLZCNTQrrk VR512:$src1,
5086 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5087
Cameron McInally0d0489c2014-06-16 14:12:28 +00005088def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
5089 (VPLZCNTDrm addr:$src)>;
5090def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
5091 (VPLZCNTDrr VR512:$src)>;
5092def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
5093 (VPLZCNTQrm addr:$src)>;
5094def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
5095 (VPLZCNTQrr VR512:$src)>;
5096
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00005097def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5098def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5099def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Elena Demikhovskyacc5c9e2014-04-22 14:13:10 +00005100
5101def : Pat<(store VK1:$src, addr:$dst),
5102 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
5103
5104def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
5105 (truncstore node:$val, node:$ptr), [{
5106 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
5107}]>;
5108
5109def : Pat<(truncstorei1 GR8:$src, addr:$dst),
5110 (MOV8mr addr:$dst, GR8:$src)>;
5111