blob: e264a3de4372feb480f6000a9d6f9efea5625676 [file] [log] [blame]
Adam Nemet5ed17da2014-08-21 19:50:07 +00001// Group template arguments that can be derived from the vector type (EltNum x
2// EltVT). These are things like the register class for the writemask, etc.
3// The idea is to pass one of these as the template argument rather than the
4// individual arguments.
5class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
6 string suffix = ""> {
7 RegisterClass RC = rc;
8
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
11
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
14
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
17 // !lt in tablegen.
18 RegisterClass MRC =
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
21
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
24
Robert Khasanov2ea081d2014-08-25 14:49:34 +000025 string VTName = "v" # NumElts # EltVT;
26
Adam Nemet5ed17da2014-08-21 19:50:07 +000027 // The vector VT.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000028 ValueType VT = !cast<ValueType>(VTName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000029
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
Robert Khasanov2ea081d2014-08-25 14:49:34 +000032 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
Adam Nemet5ed17da2014-08-21 19:50:07 +000034
35 // "i" for integer types and "f" for floating-point types
Robert Khasanov2ea081d2014-08-25 14:49:34 +000036 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
Adam Nemet5ed17da2014-08-21 19:50:07 +000037
38 // Size of RC in bits, e.g. 512 for VR512.
39 int Size = VT.Size;
40
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
Robert Khasanov2ea081d2014-08-25 14:49:34 +000043 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
44
45 // Load patterns
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
52 VTName)), VTName));
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
Adam Nemet5ed17da2014-08-21 19:50:07 +000054
55 // The corresponding float type, e.g. v16f32 for v16i32
Robert Khasanov2ea081d2014-08-25 14:49:34 +000056 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
60 VTName,
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
63 VTName)));
Adam Nemet5ed17da2014-08-21 19:50:07 +000064
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
67}
68
Robert Khasanov2ea081d2014-08-25 14:49:34 +000069def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
70def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
Adam Nemet5ed17da2014-08-21 19:50:07 +000071def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
72def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
73
Robert Khasanov2ea081d2014-08-25 14:49:34 +000074// "x" in v32i8x_info means RC = VR256X
75def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
76def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
77def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
78def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
79
80def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
81def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
82def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
83def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
84
85class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
86 X86VectorVTInfo i128> {
87 X86VectorVTInfo info512 = i512;
88 X86VectorVTInfo info256 = i256;
89 X86VectorVTInfo info128 = i128;
90}
91
92def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
93 v16i8x_info>;
94def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
95 v8i16x_info>;
96def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
97 v4i32x_info>;
98def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
99 v2i64x_info>;
100
101
Adam Nemet2e91ee52014-08-14 17:13:19 +0000102// Common base class of AVX512_masking and AVX512_masking_3src.
103multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
104 dag MaskingIns, dag ZeroMaskingIns,
105 string OpcodeStr,
106 string AttSrcAsm, string IntelSrcAsm,
107 dag RHS, dag MaskingRHS, ValueType OpVT,
108 RegisterClass RC, RegisterClass KRC,
109 string MaskingConstraint = ""> {
Adam Nemet2e2537f2014-08-07 17:53:55 +0000110 def NAME: AVX512<O, F, Outs, Ins,
111 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
112 "$dst, "#IntelSrcAsm#"}",
113 [(set RC:$dst, RHS)]>;
114
Adam Nemetfa1f7202014-08-07 23:18:18 +0000115 // Prefer over VMOV*rrk Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000116 let AddedComplexity = 20 in
117 def NAME#k: AVX512<O, F, Outs, MaskingIns,
Adam Nemet2e2537f2014-08-07 17:53:55 +0000118 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
119 "$dst {${mask}}, "#IntelSrcAsm#"}",
Adam Nemet2e91ee52014-08-14 17:13:19 +0000120 [(set RC:$dst, MaskingRHS)]>,
121 EVEX_K {
122 // In case of the 3src subclass this is overridden with a let.
123 string Constraints = MaskingConstraint;
124 }
Adam Nemet7d498622014-08-07 23:53:38 +0000125 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
Adam Nemet2e91ee52014-08-14 17:13:19 +0000126 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
Adam Nemet7d498622014-08-07 23:53:38 +0000127 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
128 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
129 [(set RC:$dst,
130 (vselect KRC:$mask, RHS,
131 (OpVT (bitconvert
132 (v16i32 immAllZerosV)))))]>,
133 EVEX_KZ;
Adam Nemet2e2537f2014-08-07 17:53:55 +0000134}
135
Adam Nemet2e91ee52014-08-14 17:13:19 +0000136// This multiclass generates the unconditional/non-masking, the masking and
137// the zero-masking variant of the instruction. In the masking case, the
138// perserved vector elements come from a new dummy input operand tied to $dst.
139multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
140 string OpcodeStr,
141 string AttSrcAsm, string IntelSrcAsm,
142 dag RHS, ValueType OpVT, RegisterClass RC,
143 RegisterClass KRC> :
144 AVX512_masking_common<O, F, Outs,
145 Ins,
146 !con((ins RC:$src0, KRC:$mask), Ins),
147 !con((ins KRC:$mask), Ins),
148 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
149 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
150 "$src0 = $dst">;
151
152// Similar to AVX512_masking but in this case one of the source operands
153// ($src1) is already tied to $dst so we just use that for the preserved
154// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
155// $src1.
156multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
157 string OpcodeStr,
158 string AttSrcAsm, string IntelSrcAsm,
159 dag RHS, ValueType OpVT,
160 RegisterClass RC, RegisterClass KRC> :
161 AVX512_masking_common<O, F, Outs,
162 !con((ins RC:$src1), NonTiedIns),
163 !con((ins RC:$src1), !con((ins KRC:$mask),
164 NonTiedIns)),
165 !con((ins RC:$src1), !con((ins KRC:$mask),
166 NonTiedIns)),
167 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
168 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
169
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000170// Bitcasts between 512-bit vector types. Return the original type since
171// no instruction is needed for the conversion
172let Predicates = [HasAVX512] in {
Robert Khasanovbfa01312014-07-21 14:54:21 +0000173 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000174 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000175 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
176 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
177 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000178 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000179 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
180 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
181 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000182 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000183 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000184 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
185 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000186 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000187 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
188 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovsky40a77142014-08-11 09:59:08 +0000189 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000190 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
191 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000192 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
Robert Khasanovbfa01312014-07-21 14:54:21 +0000193 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
194 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
195 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
196 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
197 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
200 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
201 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
202 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
203 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000204
205 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
206 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
207 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
208 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
209 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
210 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
211 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
212 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
213 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
214 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
215 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
216 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
217 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
218 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
219 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
220 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
221 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
222 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
223 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
224 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
225 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
226 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
227 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
228 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
229 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
230 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
231 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
232 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
233 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
234 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
235
236// Bitcasts between 256-bit vector types. Return the original type since
237// no instruction is needed for the conversion
238 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
239 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
240 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
241 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
242 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
243 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
244 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
245 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
246 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
247 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
248 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
249 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
250 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
251 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
252 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
253 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
254 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
255 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
256 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
257 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
258 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
259 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
260 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
261 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
262 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
263 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
264 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
265 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
266 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
267 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
268}
269
270//
271// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
272//
273
274let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
275 isPseudo = 1, Predicates = [HasAVX512] in {
276def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
277 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
278}
279
Craig Topperfb1746b2014-01-30 06:03:19 +0000280let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000281def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
282def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
283def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
Craig Topperfb1746b2014-01-30 06:03:19 +0000284}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000285
286//===----------------------------------------------------------------------===//
287// AVX-512 - VECTOR INSERT
288//
289// -- 32x8 form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000290let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000291def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
292 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
293 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
294 []>, EVEX_4V, EVEX_V512;
295let mayLoad = 1 in
296def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
297 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
300}
301
302// -- 64x4 fp form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000303let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000304def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
305 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
306 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
307 []>, EVEX_4V, EVEX_V512, VEX_W;
308let mayLoad = 1 in
309def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
310 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
313}
314// -- 32x4 integer form --
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000315let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000316def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
317 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
318 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512;
320let mayLoad = 1 in
321def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
322 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000325}
326
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000327let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000328// -- 64x4 form --
329def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
330 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
331 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
332 []>, EVEX_4V, EVEX_V512, VEX_W;
333let mayLoad = 1 in
334def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
335 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
338}
339
340def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
341 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
342 (INSERT_get_vinsert128_imm VR512:$ins))>;
343def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
344 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
345 (INSERT_get_vinsert128_imm VR512:$ins))>;
346def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
347 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
348 (INSERT_get_vinsert128_imm VR512:$ins))>;
349def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
350 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
351 (INSERT_get_vinsert128_imm VR512:$ins))>;
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000352
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000353def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
354 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
Robert Khasanoved0b2e92014-03-31 16:01:38 +0000357 (bc_v4i32 (loadv2i64 addr:$src2)),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000358 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
359 (INSERT_get_vinsert128_imm VR512:$ins))>;
360def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
364 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
365 (INSERT_get_vinsert128_imm VR512:$ins))>;
366
367def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
368 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
369 (INSERT_get_vinsert256_imm VR512:$ins))>;
370def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
371 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
372 (INSERT_get_vinsert256_imm VR512:$ins))>;
373def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
374 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
375 (INSERT_get_vinsert256_imm VR512:$ins))>;
376def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
377 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
378 (INSERT_get_vinsert256_imm VR512:$ins))>;
379
380def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
381 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
384 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
387 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
388 (INSERT_get_vinsert256_imm VR512:$ins))>;
389def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
390 (bc_v8i32 (loadv4i64 addr:$src2)),
391 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
393
394// vinsertps - insert f32 to XMM
395def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
396 (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000397 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000398 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000399 EVEX_4V;
400def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
401 (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
Filipe Cabecinhas20352212014-04-21 20:07:29 +0000403 [(set VR128X:$dst, (X86insertps VR128X:$src1,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000404 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
405 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
406
407//===----------------------------------------------------------------------===//
408// AVX-512 VECTOR EXTRACT
409//---
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000410let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000411// -- 32x4 form --
412def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
413 (ins VR512:$src1, i8imm:$src2),
414 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
415 []>, EVEX, EVEX_V512;
416def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
417 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
418 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
419 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
420
421// -- 64x4 form --
422def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
423 (ins VR512:$src1, i8imm:$src2),
424 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
425 []>, EVEX, EVEX_V512, VEX_W;
426let mayStore = 1 in
427def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
428 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
429 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
430 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
431}
432
Elena Demikhovskyf404e052014-01-05 14:21:07 +0000433let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000434// -- 32x4 form --
435def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
436 (ins VR512:$src1, i8imm:$src2),
437 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
438 []>, EVEX, EVEX_V512;
439def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
440 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
441 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
442 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
443
444// -- 64x4 form --
445def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
446 (ins VR512:$src1, i8imm:$src2),
447 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
448 []>, EVEX, EVEX_V512, VEX_W;
449let mayStore = 1 in
450def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
451 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
452 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
453 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
454}
455
456def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
457 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
458 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
459
460def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
461 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
462 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
463
464def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
465 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
466 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
467
468def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
469 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
470 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
471
472
473def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
474 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
475 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
476
477def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
478 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
479 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
480
481def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
482 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
483 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
484
485def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
486 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
487 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
488
489// A 256-bit subvector extract from the first 512-bit vector position
490// is a subregister copy that needs no instruction.
491def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
492 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
493def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
494 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
495def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
496 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
497def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
498 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
499
500// zmm -> xmm
501def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
502 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
503def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
504 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
505def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
506 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
507def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
508 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
509
510
511// A 128-bit subvector insert to the first 512-bit vector position
512// is a subregister copy that needs no instruction.
513def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
515 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
516 sub_ymm)>;
517def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
518 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
519 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
520 sub_ymm)>;
521def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
523 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
524 sub_ymm)>;
525def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
526 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
527 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
528 sub_ymm)>;
529
530def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
531 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
532def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
533 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
534def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
535 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
536def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
537 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
538
539// vextractps - extract 32 bits from XMM
540def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
541 (ins VR128X:$src1, u32u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000542 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000543 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
544 EVEX;
545
546def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
547 (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +0000548 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000549 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
Elena Demikhovsky2aafc222014-02-11 07:25:59 +0000550 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000551
552//===---------------------------------------------------------------------===//
553// AVX-512 BROADCAST
554//---
555multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
556 RegisterClass DestRC,
557 RegisterClass SrcRC, X86MemOperand x86memop> {
558 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000559 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000560 []>, EVEX;
561 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000562 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000563}
564let ExeDomain = SSEPackedSingle in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000565 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000566 VR128X, f32mem>,
567 EVEX_V512, EVEX_CD8<32, CD8VT1>;
568}
569
570let ExeDomain = SSEPackedDouble in {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000571 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000572 VR128X, f64mem>,
573 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
574}
575
576def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
577 (VBROADCASTSSZrm addr:$src)>;
578def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
579 (VBROADCASTSDZrm addr:$src)>;
580
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000581def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
582 (VBROADCASTSSZrm addr:$src)>;
583def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
584 (VBROADCASTSDZrm addr:$src)>;
585
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000586multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
587 RegisterClass SrcRC, RegisterClass KRC> {
588 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000589 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000590 []>, EVEX, EVEX_V512;
591 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
592 (ins KRC:$mask, SrcRC:$src),
593 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000594 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000595 []>, EVEX, EVEX_V512, EVEX_KZ;
596}
597
598defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
599defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
600 VEX_W;
601
602def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
603 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
604
605def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
606 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
607
608def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
609 (VPBROADCASTDrZrr GR32:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +0000610def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
611 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000612def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
613 (VPBROADCASTQrZrr GR64:$src)>;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000614def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
615 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000616
Cameron McInally394d5572013-10-31 13:56:31 +0000617def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
618 (VPBROADCASTDrZrr GR32:$src)>;
619def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
620 (VPBROADCASTQrZrr GR64:$src)>;
621
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000622def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
623 (v16i32 immAllZerosV), (i16 GR16:$mask))),
624 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
625def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
626 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
627 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
628
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000629multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
630 X86MemOperand x86memop, PatFrag ld_frag,
631 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
632 RegisterClass KRC> {
633 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000634 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000635 [(set DstRC:$dst,
636 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
637 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
638 VR128X:$src),
639 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000640 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000641 [(set DstRC:$dst,
642 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
643 EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000644 let mayLoad = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000645 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000646 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000647 [(set DstRC:$dst,
648 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
649 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
650 x86memop:$src),
651 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000652 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000653 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
654 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000655 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000656}
657
658defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
659 loadi32, VR512, v16i32, v4i32, VK16WM>,
660 EVEX_V512, EVEX_CD8<32, CD8VT1>;
661defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
662 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
663 EVEX_CD8<64, CD8VT1>;
664
Adam Nemet73f72e12014-06-27 00:43:38 +0000665multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
666 X86MemOperand x86memop, PatFrag ld_frag,
667 RegisterClass KRC> {
668 let mayLoad = 1 in {
669 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
670 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
671 []>, EVEX;
672 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
673 x86memop:$src),
674 !strconcat(OpcodeStr,
675 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
676 []>, EVEX, EVEX_KZ;
677 }
678}
679
680defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
681 i128mem, loadv2i64, VK16WM>,
682 EVEX_V512, EVEX_CD8<32, CD8VT4>;
683defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
684 i256mem, loadv4i64, VK16WM>, VEX_W,
685 EVEX_V512, EVEX_CD8<64, CD8VT4>;
686
Cameron McInally394d5572013-10-31 13:56:31 +0000687def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
688 (VPBROADCASTDZrr VR128X:$src)>;
689def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
690 (VPBROADCASTQZrr VR128X:$src)>;
691
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000692def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
693 (VBROADCASTSSZrr VR128X:$src)>;
694def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
695 (VBROADCASTSDZrr VR128X:$src)>;
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000696
697def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
698 (VBROADCASTSSZrr VR128X:$src)>;
699def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
700 (VBROADCASTSDZrr VR128X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000701
702// Provide fallback in case the load node that is used in the patterns above
703// is used by additional users, which prevents the pattern selection.
704def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
705 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
706def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
707 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
708
709
710let Predicates = [HasAVX512] in {
711def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
712 (EXTRACT_SUBREG
713 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
714 addr:$src)), sub_ymm)>;
715}
716//===----------------------------------------------------------------------===//
717// AVX-512 BROADCAST MASK TO VECTOR REGISTER
718//---
719
720multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
721 RegisterClass DstRC, RegisterClass KRC,
722 ValueType OpVT, ValueType SrcVT> {
723def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000724 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000725 []>, EVEX;
726}
727
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000728let Predicates = [HasCDI] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000729defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
730 VK16, v16i32, v16i1>, EVEX_V512;
731defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
732 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
Cameron McInallyc43c8f92014-06-13 11:40:31 +0000733}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000734
735//===----------------------------------------------------------------------===//
736// AVX-512 - VPERM
737//
738// -- immediate form --
739multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
740 SDNode OpNode, PatFrag mem_frag,
741 X86MemOperand x86memop, ValueType OpVT> {
742 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
743 (ins RC:$src1, i8imm:$src2),
744 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000745 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000746 [(set RC:$dst,
747 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
748 EVEX;
749 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
750 (ins x86memop:$src1, i8imm:$src2),
751 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000752 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000753 [(set RC:$dst,
754 (OpVT (OpNode (mem_frag addr:$src1),
755 (i8 imm:$src2))))]>, EVEX;
756}
757
758defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
759 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760let ExeDomain = SSEPackedDouble in
761defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
762 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
763
764// -- VPERM - register form --
765multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
766 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
767
768 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
769 (ins RC:$src1, RC:$src2),
770 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000771 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000772 [(set RC:$dst,
773 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
774
775 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
776 (ins RC:$src1, x86memop:$src2),
777 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000778 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000779 [(set RC:$dst,
780 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
781 EVEX_4V;
782}
783
784defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
785 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
786defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
787 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
788let ExeDomain = SSEPackedSingle in
789defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
790 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
791let ExeDomain = SSEPackedDouble in
792defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
793 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
794
795// -- VPERM2I - 3 source operands form --
796multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
797 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet2415a492014-07-02 21:25:54 +0000798 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000799let Constraints = "$src1 = $dst" in {
800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
801 (ins RC:$src1, RC:$src2, RC:$src3),
802 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000803 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000804 [(set RC:$dst,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000805 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000806 EVEX_4V;
807
Adam Nemet2415a492014-07-02 21:25:54 +0000808 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
809 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
810 !strconcat(OpcodeStr,
811 " \t{$src3, $src2, $dst {${mask}}|"
812 "$dst {${mask}}, $src2, $src3}"),
813 [(set RC:$dst, (OpVT (vselect KRC:$mask,
814 (OpNode RC:$src1, RC:$src2,
815 RC:$src3),
816 RC:$src1)))]>,
817 EVEX_4V, EVEX_K;
818
819 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
821 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
822 !strconcat(OpcodeStr,
823 " \t{$src3, $src2, $dst {${mask}} {z} |",
824 "$dst {${mask}} {z}, $src2, $src3}"),
825 [(set RC:$dst, (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
827 RC:$src3),
828 (OpVT (bitconvert
829 (v16i32 immAllZerosV))))))]>,
830 EVEX_4V, EVEX_KZ;
831
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000832 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
833 (ins RC:$src1, RC:$src2, x86memop:$src3),
834 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000835 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000836 [(set RC:$dst,
Adam Nemet2415a492014-07-02 21:25:54 +0000837 (OpVT (OpNode RC:$src1, RC:$src2,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000838 (mem_frag addr:$src3))))]>, EVEX_4V;
Adam Nemet2415a492014-07-02 21:25:54 +0000839
840 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
841 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
842 !strconcat(OpcodeStr,
843 " \t{$src3, $src2, $dst {${mask}}|"
844 "$dst {${mask}}, $src2, $src3}"),
845 [(set RC:$dst,
846 (OpVT (vselect KRC:$mask,
847 (OpNode RC:$src1, RC:$src2,
848 (mem_frag addr:$src3)),
849 RC:$src1)))]>,
850 EVEX_4V, EVEX_K;
851
852 let AddedComplexity = 10 in // Prefer over the rrkz variant
853 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
854 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
855 !strconcat(OpcodeStr,
856 " \t{$src3, $src2, $dst {${mask}} {z}|"
857 "$dst {${mask}} {z}, $src2, $src3}"),
858 [(set RC:$dst,
859 (OpVT (vselect KRC:$mask,
860 (OpNode RC:$src1, RC:$src2,
861 (mem_frag addr:$src3)),
862 (OpVT (bitconvert
863 (v16i32 immAllZerosV))))))]>,
864 EVEX_4V, EVEX_KZ;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000865 }
866}
Adam Nemet2415a492014-07-02 21:25:54 +0000867defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
868 i512mem, X86VPermiv3, v16i32, VK16WM>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
870defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
871 i512mem, X86VPermiv3, v8i64, VK8WM>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
873defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
874 i512mem, X86VPermiv3, v16f32, VK16WM>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
876defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
877 i512mem, X86VPermiv3, v8f64, VK8WM>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000879
Adam Nemetefe9c982014-07-02 21:25:58 +0000880multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
881 PatFrag mem_frag, X86MemOperand x86memop,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000882 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
883 ValueType MaskVT, RegisterClass MRC> :
Adam Nemetefe9c982014-07-02 21:25:58 +0000884 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
885 OpVT, KRC> {
886 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
887 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
888 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000889
890 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
891 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
892 (!cast<Instruction>(NAME#rrk) VR512:$src1,
893 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000894}
895
896defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000897 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
898 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000899defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000900 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
901 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000902defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000903 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
904 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemetefe9c982014-07-02 21:25:58 +0000905defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000906 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
907 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000908
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000909//===----------------------------------------------------------------------===//
910// AVX-512 - BLEND using mask
911//
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000912multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000913 RegisterClass KRC, RegisterClass RC,
914 X86MemOperand x86memop, PatFrag mem_frag,
915 SDNode OpNode, ValueType vt> {
916 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000917 (ins KRC:$mask, RC:$src1, RC:$src2),
918 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000919 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000920 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000921 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000922 let mayLoad = 1 in
923 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
924 (ins KRC:$mask, RC:$src1, x86memop:$src2),
925 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +0000926 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000927 []>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000928}
929
930let ExeDomain = SSEPackedSingle in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000931defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000932 VK16WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000933 memopv16f32, vselect, v16f32>,
934 EVEX_CD8<32, CD8VF>, EVEX_V512;
935let ExeDomain = SSEPackedDouble in
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000936defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000937 VK8WM, VR512, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000938 memopv8f64, vselect, v8f64>,
939 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
940
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000941def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
942 (v16f32 VR512:$src2), (i16 GR16:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000943 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000944 VR512:$src1, VR512:$src2)>;
945
946def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
947 (v8f64 VR512:$src2), (i8 GR8:$mask))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +0000948 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000949 VR512:$src1, VR512:$src2)>;
950
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000951defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000952 VK16WM, VR512, f512mem,
953 memopv16i32, vselect, v16i32>,
954 EVEX_CD8<32, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000955
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000956defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000957 VK8WM, VR512, f512mem,
958 memopv8i64, vselect, v8i64>,
959 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000960
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000961def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
962 (v16i32 VR512:$src2), (i16 GR16:$mask))),
963 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
964 VR512:$src1, VR512:$src2)>;
965
966def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
967 (v8i64 VR512:$src2), (i8 GR8:$mask))),
968 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
969 VR512:$src1, VR512:$src2)>;
970
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +0000971let Predicates = [HasAVX512] in {
972def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
973 (v8f32 VR256X:$src2))),
974 (EXTRACT_SUBREG
975 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
976 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
977 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
978
979def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
980 (v8i32 VR256X:$src2))),
981 (EXTRACT_SUBREG
982 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
983 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
984 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
985}
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +0000986//===----------------------------------------------------------------------===//
987// Compare Instructions
988//===----------------------------------------------------------------------===//
989
990// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
991multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
992 Operand CC, SDNode OpNode, ValueType VT,
993 PatFrag ld_frag, string asm, string asm_alt> {
994 def rr : AVX512Ii8<0xC2, MRMSrcReg,
995 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
996 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
997 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
998 def rm : AVX512Ii8<0xC2, MRMSrcMem,
999 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
1000 [(set VK1:$dst, (OpNode (VT RC:$src1),
1001 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Craig Topper0550ce72014-01-05 04:55:55 +00001002 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001003 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
1004 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1005 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1006 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
1007 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1008 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1009 }
1010}
1011
1012let Predicates = [HasAVX512] in {
1013defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
1014 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1015 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1016 XS;
1017defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
1018 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1019 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1020 XD, VEX_W;
1021}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001022
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001023multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1024 X86VectorVTInfo _> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001025 def rr : AVX512BI<opc, MRMSrcReg,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001026 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1027 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1028 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001029 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001030 let mayLoad = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001031 def rm : AVX512BI<opc, MRMSrcMem,
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001032 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1034 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1035 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001036 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001037 def rrk : AVX512BI<opc, MRMSrcReg,
1038 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1040 "$dst {${mask}}, $src1, $src2}"),
1041 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1042 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1043 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1044 let mayLoad = 1 in
1045 def rmk : AVX512BI<opc, MRMSrcMem,
1046 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1048 "$dst {${mask}}, $src1, $src2}"),
1049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1050 (OpNode (_.VT _.RC:$src1),
1051 (_.VT (bitconvert
1052 (_.LdFrag addr:$src2))))))],
1053 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001054}
1055
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001056multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1057 X86VectorVTInfo _> {
1058 let mayLoad = 1 in {
1059 def rmb : AVX512BI<opc, MRMSrcMem,
1060 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1061 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1062 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1063 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1064 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1065 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1066 def rmbk : AVX512BI<opc, MRMSrcMem,
1067 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1068 _.ScalarMemOp:$src2),
1069 !strconcat(OpcodeStr,
1070 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1071 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1072 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1073 (OpNode (_.VT _.RC:$src1),
1074 (X86VBroadcast
1075 (_.ScalarLdFrag addr:$src2)))))],
1076 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1077 }
1078}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001079
Robert Khasanov2ea081d2014-08-25 14:49:34 +00001080multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1081 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1082 let Predicates = [prd] in
1083 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1084 EVEX_V512;
1085
1086 let Predicates = [prd, HasVLX] in {
1087 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1088 EVEX_V256;
1089 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1090 EVEX_V128;
1091 }
1092}
1093
1094multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1095 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1096 Predicate prd> {
1097 let Predicates = [prd] in
1098 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1099 EVEX_V512;
1100
1101 let Predicates = [prd, HasVLX] in {
1102 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1103 EVEX_V256;
1104 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1105 EVEX_V128;
1106 }
1107}
1108
1109defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1110 avx512vl_i8_info, HasBWI>,
1111 EVEX_CD8<8, CD8VF>;
1112
1113defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1114 avx512vl_i16_info, HasBWI>,
1115 EVEX_CD8<16, CD8VF>;
1116
1117defm VPCMPEQD : avx512_icmp_packed_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1118 avx512vl_i32_info, HasAVX512>,
1119 avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1120 avx512vl_i32_info, HasAVX512>,
1121 EVEX_CD8<32, CD8VF>;
1122
1123defm VPCMPEQQ : avx512_icmp_packed_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1124 avx512vl_i64_info, HasAVX512>,
1125 avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1126 avx512vl_i64_info, HasAVX512>,
1127 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1128
1129defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1130 avx512vl_i8_info, HasBWI>,
1131 EVEX_CD8<8, CD8VF>;
1132
1133defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1134 avx512vl_i16_info, HasBWI>,
1135 EVEX_CD8<16, CD8VF>;
1136
1137defm VPCMPGTD : avx512_icmp_packed_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1138 avx512vl_i32_info, HasAVX512>,
1139 avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1140 avx512vl_i32_info, HasAVX512>,
1141 EVEX_CD8<32, CD8VF>;
1142
1143defm VPCMPGTQ : avx512_icmp_packed_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1144 avx512vl_i64_info, HasAVX512>,
1145 avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1146 avx512vl_i64_info, HasAVX512>,
1147 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001148
1149def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1150 (COPY_TO_REGCLASS (VPCMPGTDZrr
1151 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1152 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1153
1154def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1155 (COPY_TO_REGCLASS (VPCMPEQDZrr
1156 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1157 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1158
Adam Nemet79580db2014-07-08 00:22:32 +00001159multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001160 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
Adam Nemet1efcb902014-07-01 18:03:43 +00001161 SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001162 def rri : AVX512AIi8<opc, MRMSrcReg,
Adam Nemet1efcb902014-07-01 18:03:43 +00001163 (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
1164 !strconcat("vpcmp${cc}", Suffix,
1165 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001166 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
1167 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1168 def rmi : AVX512AIi8<opc, MRMSrcMem,
Adam Nemet1efcb902014-07-01 18:03:43 +00001169 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
1170 !strconcat("vpcmp${cc}", Suffix,
1171 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001172 [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
1173 imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1174 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001175 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001176 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001177 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001178 !strconcat("vpcmp", Suffix,
1179 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1180 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
Adam Nemet16de2482014-07-01 18:03:45 +00001181 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
Adam Nemet79580db2014-07-08 00:22:32 +00001182 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001183 !strconcat("vpcmp", Suffix,
1184 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1185 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001186 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001187 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Adam Nemet1efcb902014-07-01 18:03:43 +00001188 !strconcat("vpcmp", Suffix,
1189 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1190 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
Adam Nemet16de2482014-07-01 18:03:45 +00001191 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
Adam Nemet79580db2014-07-08 00:22:32 +00001192 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
Adam Nemet16de2482014-07-01 18:03:45 +00001193 !strconcat("vpcmp", Suffix,
1194 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1195 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001196 }
1197}
1198
Adam Nemet79580db2014-07-08 00:22:32 +00001199defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
Adam Nemet1efcb902014-07-01 18:03:43 +00001200 X86cmpm, v16i32, AVXCC, "d">,
1201 EVEX_V512, EVEX_CD8<32, CD8VF>;
Adam Nemet79580db2014-07-08 00:22:32 +00001202defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
Adam Nemet1efcb902014-07-01 18:03:43 +00001203 X86cmpmu, v16i32, AVXCC, "ud">,
1204 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001205
Adam Nemet79580db2014-07-08 00:22:32 +00001206defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
Adam Nemet1efcb902014-07-01 18:03:43 +00001207 X86cmpm, v8i64, AVXCC, "q">,
1208 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Adam Nemet79580db2014-07-08 00:22:32 +00001209defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
Adam Nemet1efcb902014-07-01 18:03:43 +00001210 X86cmpmu, v8i64, AVXCC, "uq">,
1211 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001212
Adam Nemet905832b2014-06-26 00:21:12 +00001213// avx512_cmp_packed - compare packed instructions
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001214multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001215 X86MemOperand x86memop, ValueType vt,
1216 string suffix, Domain d> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001217 def rri : AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001218 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1219 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001220 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001221 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1222 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001223 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001224 !strconcat("vcmp${cc}", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001225 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001226 [], d>, EVEX_B;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001227 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001228 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001229 !strconcat("vcmp${cc}", suffix,
1230 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001231 [(set KRC:$dst,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001232 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001233
1234 // Accept explicit immediate argument form instead of comparison code.
Craig Topper0550ce72014-01-05 04:55:55 +00001235 let isAsmParserOnly = 1, hasSideEffects = 0 in {
Craig Toppera328ee42013-10-09 04:24:38 +00001236 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
Adam Nemet905832b2014-06-26 00:21:12 +00001237 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001238 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001239 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Craig Toppera328ee42013-10-09 04:24:38 +00001240 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
Adam Nemet905832b2014-06-26 00:21:12 +00001241 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001242 !strconcat("vcmp", suffix,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001243 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001244 }
1245}
1246
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001247defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00001248 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
Craig Topperda7160d2014-02-01 08:17:56 +00001249 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001250defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00001251 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001252 EVEX_CD8<64, CD8VF>;
1253
1254def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1255 (COPY_TO_REGCLASS (VCMPPSZrri
1256 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1257 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1258 imm:$cc), VK8)>;
1259def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1260 (COPY_TO_REGCLASS (VPCMPDZrri
1261 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1262 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1263 imm:$cc), VK8)>;
1264def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1265 (COPY_TO_REGCLASS (VPCMPUDZrri
1266 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1267 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1268 imm:$cc), VK8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001269
1270def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1271 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1272 FROUND_NO_EXC)),
1273 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001274 (I8Imm imm:$cc)), GR16)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001275
1276def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1277 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1278 FROUND_NO_EXC)),
1279 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001280 (I8Imm imm:$cc)), GR8)>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00001281
1282def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1283 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1284 FROUND_CURRENT)),
1285 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1286 (I8Imm imm:$cc)), GR16)>;
1287
1288def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1289 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1290 FROUND_CURRENT)),
1291 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1292 (I8Imm imm:$cc)), GR8)>;
1293
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001294// Mask register copy, including
1295// - copy between mask registers
1296// - load/store mask registers
1297// - copy from GPR to mask register and vice versa
1298//
1299multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1300 string OpcodeStr, RegisterClass KRC,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001301 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001302 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001303 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001304 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001305 let mayLoad = 1 in
1306 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001307 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Robert Khasanov74acbb72014-07-23 14:49:42 +00001308 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001309 let mayStore = 1 in
1310 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001311 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001312 }
1313}
1314
1315multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1316 string OpcodeStr,
1317 RegisterClass KRC, RegisterClass GRC> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00001318 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001319 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001320 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001321 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001322 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001323 }
1324}
1325
Robert Khasanov74acbb72014-07-23 14:49:42 +00001326let Predicates = [HasDQI] in
1327 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1328 i8mem>,
1329 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1330 VEX, PD;
1331
1332let Predicates = [HasAVX512] in
1333 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1334 i16mem>,
1335 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001336 VEX, PS;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001337
1338let Predicates = [HasBWI] in {
1339 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1340 i32mem>, VEX, PD, VEX_W;
1341 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1342 VEX, XD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001343}
1344
Robert Khasanov74acbb72014-07-23 14:49:42 +00001345let Predicates = [HasBWI] in {
1346 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1347 i64mem>, VEX, PS, VEX_W;
1348 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1349 VEX, XD, VEX_W;
1350}
1351
1352// GR from/to mask register
1353let Predicates = [HasDQI] in {
1354 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1355 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1356 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1357 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1358}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001359let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001360 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1361 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1362 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1363 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001364}
1365let Predicates = [HasBWI] in {
1366 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1367 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1368}
1369let Predicates = [HasBWI] in {
1370 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1371 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1372}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001373
Robert Khasanov74acbb72014-07-23 14:49:42 +00001374// Load/store kreg
1375let Predicates = [HasDQI] in {
1376 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1377 (KMOVBmk addr:$dst, VK8:$src)>;
1378}
1379let Predicates = [HasAVX512] in {
1380 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001381 (KMOVWmk addr:$dst, VK16:$src)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001382 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001383 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001384 def : Pat<(i1 (load addr:$src)),
1385 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001386 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001387 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001388}
1389let Predicates = [HasBWI] in {
1390 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1391 (KMOVDmk addr:$dst, VK32:$src)>;
1392}
1393let Predicates = [HasBWI] in {
1394 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1395 (KMOVQmk addr:$dst, VK64:$src)>;
1396}
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001397
Robert Khasanov74acbb72014-07-23 14:49:42 +00001398let Predicates = [HasAVX512] in {
Elena Demikhovsky34d2d762014-08-18 11:59:06 +00001399 def : Pat<(i1 (trunc (i64 GR64:$src))),
1400 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1401 (i32 1))), VK1)>;
1402
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001403 def : Pat<(i1 (trunc (i32 GR32:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001404 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001405
1406 def : Pat<(i1 (trunc (i8 GR8:$src))),
Elena Demikhovskyc9657012014-02-20 06:34:39 +00001407 (COPY_TO_REGCLASS
1408 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1409 VK1)>;
1410 def : Pat<(i1 (trunc (i16 GR16:$src))),
1411 (COPY_TO_REGCLASS
1412 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1413 VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001414
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001415 def : Pat<(i32 (zext VK1:$src)),
1416 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001417 def : Pat<(i8 (zext VK1:$src)),
1418 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001419 (AND32ri (KMOVWrk
1420 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001421 def : Pat<(i64 (zext VK1:$src)),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001422 (AND64ri8 (SUBREG_TO_REG (i64 0),
1423 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
Elena Demikhovsky750498c2014-02-17 07:29:33 +00001424 def : Pat<(i16 (zext VK1:$src)),
1425 (EXTRACT_SUBREG
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00001426 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1427 sub_16bit)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001428 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1429 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1430 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1431 (COPY_TO_REGCLASS VK1:$src, VK8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001432}
Robert Khasanov74acbb72014-07-23 14:49:42 +00001433let Predicates = [HasBWI] in {
1434 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1435 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1436 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1437 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1438}
1439
1440
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001441// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1442let Predicates = [HasAVX512] in {
1443 // GR from/to 8-bit mask without native support
1444 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1445 (COPY_TO_REGCLASS
1446 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1447 VK8)>;
1448 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1449 (EXTRACT_SUBREG
1450 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1451 sub_8bit)>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001452
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001453 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001454 (COPY_TO_REGCLASS VK16:$src, VK1)>;
Elena Demikhovsky9f423d62014-02-10 07:02:39 +00001455 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001456 (COPY_TO_REGCLASS VK8:$src, VK1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001457}
1458let Predicates = [HasBWI] in {
1459 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1460 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1461 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1462 (COPY_TO_REGCLASS VK64:$src, VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001463}
1464
1465// Mask unary operation
1466// - KNOT
1467multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
Robert Khasanov74acbb72014-07-23 14:49:42 +00001468 RegisterClass KRC, SDPatternOperator OpNode,
1469 Predicate prd> {
1470 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001471 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001472 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001473 [(set KRC:$dst, (OpNode KRC:$src))]>;
1474}
1475
Robert Khasanov74acbb72014-07-23 14:49:42 +00001476multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1477 SDPatternOperator OpNode> {
1478 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1479 HasDQI>, VEX, PD;
1480 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1481 HasAVX512>, VEX, PS;
1482 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1483 HasBWI>, VEX, PD, VEX_W;
1484 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1485 HasBWI>, VEX, PS, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001486}
1487
Robert Khasanov74acbb72014-07-23 14:49:42 +00001488defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001489
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001490multiclass avx512_mask_unop_int<string IntName, string InstName> {
1491 let Predicates = [HasAVX512] in
1492 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1493 (i16 GR16:$src)),
1494 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1495 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1496}
1497defm : avx512_mask_unop_int<"knot", "KNOT">;
1498
Robert Khasanov74acbb72014-07-23 14:49:42 +00001499let Predicates = [HasDQI] in
1500def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1501let Predicates = [HasAVX512] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001502def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001503let Predicates = [HasBWI] in
1504def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1505let Predicates = [HasBWI] in
1506def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1507
1508// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1509let Predicates = [HasAVX512] in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001510def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1511 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1512
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001513def : Pat<(not VK8:$src),
1514 (COPY_TO_REGCLASS
1515 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
Robert Khasanov74acbb72014-07-23 14:49:42 +00001516}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001517
1518// Mask binary operation
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001519// - KAND, KANDN, KOR, KXNOR, KXOR
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001520multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
Robert Khasanov595683d2014-07-28 13:46:45 +00001521 RegisterClass KRC, SDPatternOperator OpNode,
1522 Predicate prd> {
1523 let Predicates = [prd] in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001524 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1525 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001526 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001527 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1528}
1529
Robert Khasanov595683d2014-07-28 13:46:45 +00001530multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1531 SDPatternOperator OpNode> {
1532 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1533 HasDQI>, VEX_4V, VEX_L, PD;
1534 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1535 HasAVX512>, VEX_4V, VEX_L, PS;
1536 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1537 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1538 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1539 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001540}
1541
1542def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1543def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1544
1545let isCommutable = 1 in {
Robert Khasanov595683d2014-07-28 13:46:45 +00001546 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1547 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1548 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1549 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001550}
Robert Khasanov595683d2014-07-28 13:46:45 +00001551let isCommutable = 0 in
1552 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001553
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001554def : Pat<(xor VK1:$src1, VK1:$src2),
1555 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1556 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1557
1558def : Pat<(or VK1:$src1, VK1:$src2),
1559 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1560 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1561
Elena Demikhovskyb64d7e82013-12-25 10:06:40 +00001562def : Pat<(and VK1:$src1, VK1:$src2),
1563 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1564 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1565
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001566multiclass avx512_mask_binop_int<string IntName, string InstName> {
1567 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001568 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1569 (i16 GR16:$src1), (i16 GR16:$src2)),
1570 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1571 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1572 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001573}
1574
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001575defm : avx512_mask_binop_int<"kand", "KAND">;
1576defm : avx512_mask_binop_int<"kandn", "KANDN">;
1577defm : avx512_mask_binop_int<"kor", "KOR">;
1578defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1579defm : avx512_mask_binop_int<"kxor", "KXOR">;
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001580
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001581// With AVX-512, 8-bit mask is promoted to 16-bit mask.
1582multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1583 let Predicates = [HasAVX512] in
1584 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1585 (COPY_TO_REGCLASS
1586 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1587 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1588}
1589
1590defm : avx512_binop_pat<and, KANDWrr>;
1591defm : avx512_binop_pat<andn, KANDNWrr>;
1592defm : avx512_binop_pat<or, KORWrr>;
1593defm : avx512_binop_pat<xnor, KXNORWrr>;
1594defm : avx512_binop_pat<xor, KXORWrr>;
1595
1596// Mask unpacking
1597multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001598 RegisterClass KRC> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001599 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001600 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001601 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001602 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001603}
1604
1605multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001606 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
Craig Topperae11aed2014-01-14 07:41:20 +00001607 VEX_4V, VEX_L, PD;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001608}
1609
1610defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
Elena Demikhovskyc5f67262013-12-17 08:33:15 +00001611def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1612 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1613 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1614
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001615
1616multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1617 let Predicates = [HasAVX512] in
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001618 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1619 (i16 GR16:$src1), (i16 GR16:$src2)),
1620 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1621 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1622 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001623}
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00001624defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001625
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001626// Mask bit testing
1627multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1628 SDNode OpNode> {
1629 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1630 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001631 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001632 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1633}
1634
1635multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1636 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topper5ccb6172014-02-18 00:21:49 +00001637 VEX, PS;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001638}
1639
1640defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001641
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001642def : Pat<(X86cmp VK1:$src1, (i1 0)),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001643 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
Elena Demikhovsky64c95482013-12-24 14:24:07 +00001644 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001645
1646// Mask shift
1647multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1648 SDNode OpNode> {
1649 let Predicates = [HasAVX512] in
1650 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1651 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00001652 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001653 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1654}
1655
1656multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1657 SDNode OpNode> {
1658 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
Craig Topperae11aed2014-01-14 07:41:20 +00001659 VEX, TAPD, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001660}
1661
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001662defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1663defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001664
1665// Mask setting all 0s or 1s
1666multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1667 let Predicates = [HasAVX512] in
1668 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1669 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1670 [(set KRC:$dst, (VT Val))]>;
1671}
1672
1673multiclass avx512_mask_setop_w<PatFrag Val> {
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00001674 defm B : avx512_mask_setop<VK8, v8i1, Val>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001675 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1676}
1677
1678defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1679defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1680
1681// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1682let Predicates = [HasAVX512] in {
1683 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1684 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00001685 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1686 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1687 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001688}
1689def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1690 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1691
1692def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1693 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1694
1695def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1696 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1697
Elena Demikhovsky9737e382014-03-02 09:19:44 +00001698def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1699 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1700
1701def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1702 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001703//===----------------------------------------------------------------------===//
1704// AVX-512 - Aligned and unaligned load and store
1705//
1706
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001707multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1708 RegisterClass KRC, RegisterClass RC,
1709 ValueType vt, ValueType zvt, X86MemOperand memop,
1710 Domain d, bit IsReMaterializable = 1> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001711let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001712 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001713 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1714 d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001715 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001716 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1717 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001718 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001719 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1720 SchedRW = [WriteLoad] in
1721 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1722 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1723 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1724 d>, EVEX;
1725
1726 let AddedComplexity = 20 in {
1727 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1728 let hasSideEffects = 0 in
1729 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1730 (ins RC:$src0, KRC:$mask, RC:$src1),
1731 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1732 "${dst} {${mask}}, $src1}"),
1733 [(set RC:$dst, (vt (vselect KRC:$mask,
1734 (vt RC:$src1),
1735 (vt RC:$src0))))],
1736 d>, EVEX, EVEX_K;
1737 let mayLoad = 1, SchedRW = [WriteLoad] in
1738 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1739 (ins RC:$src0, KRC:$mask, memop:$src1),
1740 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1741 "${dst} {${mask}}, $src1}"),
1742 [(set RC:$dst, (vt
1743 (vselect KRC:$mask,
1744 (vt (bitconvert (ld_frag addr:$src1))),
1745 (vt RC:$src0))))],
1746 d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001747 }
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001748 let mayLoad = 1, SchedRW = [WriteLoad] in
1749 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1750 (ins KRC:$mask, memop:$src),
1751 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1752 "${dst} {${mask}} {z}, $src}"),
1753 [(set RC:$dst, (vt
1754 (vselect KRC:$mask,
1755 (vt (bitconvert (ld_frag addr:$src))),
1756 (vt (bitconvert (zvt immAllZerosV))))))],
1757 d>, EVEX, EVEX_KZ;
1758 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001759}
1760
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001761multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1762 string elty, string elsz, string vsz512,
1763 string vsz256, string vsz128, Domain d,
1764 Predicate prd, bit IsReMaterializable = 1> {
1765 let Predicates = [prd] in
1766 defm Z : avx512_load<opc, OpcodeStr,
1767 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1768 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1769 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1770 !cast<X86MemOperand>(elty##"512mem"), d,
1771 IsReMaterializable>, EVEX_V512;
1772
1773 let Predicates = [prd, HasVLX] in {
1774 defm Z256 : avx512_load<opc, OpcodeStr,
1775 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1776 "v"##vsz256##elty##elsz, "v4i64")),
1777 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1778 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1779 !cast<X86MemOperand>(elty##"256mem"), d,
1780 IsReMaterializable>, EVEX_V256;
1781
1782 defm Z128 : avx512_load<opc, OpcodeStr,
1783 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1784 "v"##vsz128##elty##elsz, "v2i64")),
1785 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1786 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1787 !cast<X86MemOperand>(elty##"128mem"), d,
1788 IsReMaterializable>, EVEX_V128;
1789 }
1790}
1791
1792
1793multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1794 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1795 X86MemOperand memop, Domain d> {
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001796 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1797 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001798 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001799 EVEX;
1800 let Constraints = "$src1 = $dst" in
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001801 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1802 (ins RC:$src1, KRC:$mask, RC:$src2),
1803 !strconcat(OpcodeStr,
1804 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001805 EVEX, EVEX_K;
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001806 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001807 (ins KRC:$mask, RC:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001808 !strconcat(OpcodeStr,
1809 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001810 [], d>, EVEX, EVEX_KZ;
1811 }
1812 let mayStore = 1 in {
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001813 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1814 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1815 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001816 def mrk : AVX512PI<opc, MRMDestMem, (outs),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001817 (ins memop:$dst, KRC:$mask, RC:$src),
1818 !strconcat(OpcodeStr,
1819 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001820 [], d>, EVEX, EVEX_K;
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001821 }
1822}
1823
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001824
1825multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1826 string st_suff_512, string st_suff_256,
1827 string st_suff_128, string elty, string elsz,
1828 string vsz512, string vsz256, string vsz128,
1829 Domain d, Predicate prd> {
1830 let Predicates = [prd] in
1831 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1832 !cast<ValueType>("v"##vsz512##elty##elsz),
1833 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1834 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1835
1836 let Predicates = [prd, HasVLX] in {
1837 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1838 !cast<ValueType>("v"##vsz256##elty##elsz),
1839 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1840 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1841
1842 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1843 !cast<ValueType>("v"##vsz128##elty##elsz),
1844 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1845 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1846 }
1847}
1848
1849defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1850 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1851 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1852 "512", "256", "", "f", "32", "16", "8", "4",
1853 SSEPackedSingle, HasAVX512>,
1854 PS, EVEX_CD8<32, CD8VF>;
1855
1856defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1857 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1858 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1859 "512", "256", "", "f", "64", "8", "4", "2",
1860 SSEPackedDouble, HasAVX512>,
1861 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1862
1863defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1864 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1865 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1866 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1867 PS, EVEX_CD8<32, CD8VF>;
1868
1869defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1870 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1871 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1872 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1873 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1874
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001875def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001876 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001877 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001878
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001879def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1880 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1881 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001882
Elena Demikhovskyfd056672014-03-13 12:05:52 +00001883def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1884 GR16:$mask),
1885 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1886 VR512:$src)>;
1887def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1888 GR8:$mask),
1889 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1890 VR512:$src)>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001891
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001892defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1893 "16", "8", "4", SSEPackedInt, HasAVX512>,
1894 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1895 "512", "256", "", "i", "32", "16", "8", "4",
1896 SSEPackedInt, HasAVX512>,
1897 PD, EVEX_CD8<32, CD8VF>;
1898
1899defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1900 "8", "4", "2", SSEPackedInt, HasAVX512>,
1901 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1902 "512", "256", "", "i", "64", "8", "4", "2",
1903 SSEPackedInt, HasAVX512>,
1904 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1905
1906defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1907 "64", "32", "16", SSEPackedInt, HasBWI>,
1908 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1909 "i", "8", "64", "32", "16", SSEPackedInt,
1910 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1911
1912defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1913 "32", "16", "8", SSEPackedInt, HasBWI>,
1914 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1915 "i", "16", "32", "16", "8", SSEPackedInt,
1916 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1917
1918defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1919 "16", "8", "4", SSEPackedInt, HasAVX512>,
1920 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1921 "i", "32", "16", "8", "4", SSEPackedInt,
1922 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1923
1924defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1925 "8", "4", "2", SSEPackedInt, HasAVX512>,
1926 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1927 "i", "64", "8", "4", "2", SSEPackedInt,
1928 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky1f3ed412013-10-22 09:19:28 +00001929
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001930def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
1931 (v16i32 immAllZerosV), GR16:$mask)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001932 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001933
1934def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001935 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
1936 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00001937
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001938def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001939 GR16:$mask),
1940 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001941 VR512:$src)>;
1942def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001943 GR8:$mask),
1944 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00001945 VR512:$src)>;
1946
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001947let AddedComplexity = 20 in {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001948def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001949 (bc_v8i64 (v16i32 immAllZerosV)))),
1950 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001951
1952def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001953 (v8i64 VR512:$src))),
1954 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001955 VK8), VR512:$src)>;
1956
1957def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
1958 (v16i32 immAllZerosV))),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001959 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00001960
1961def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001962 (v16i32 VR512:$src))),
1963 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001964}
Robert Khasanov7ca7df02014-08-04 14:35:15 +00001965
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001966// Move Int Doubleword to Packed Double Int
1967//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001968def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001969 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001970 [(set VR128X:$dst,
1971 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
1972 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001973def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001974 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001975 [(set VR128X:$dst,
1976 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
1977 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001978def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001979 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001980 [(set VR128X:$dst,
1981 (v2i64 (scalar_to_vector GR64:$src)))],
1982 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
Craig Topper88adf2a2013-10-12 05:41:08 +00001983let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001984def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001985 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001986 [(set FR64:$dst, (bitconvert GR64:$src))],
1987 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001988def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001989 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001990 [(set GR64:$dst, (bitconvert FR64:$src))],
1991 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
Craig Topper88adf2a2013-10-12 05:41:08 +00001992}
Elena Demikhovsky767fc962014-01-14 15:10:08 +00001993def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00001994 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00001995 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
1996 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
1997 EVEX_CD8<64, CD8VT1>;
1998
1999// Move Int Doubleword to Single Scalar
2000//
Craig Topper88adf2a2013-10-12 05:41:08 +00002001let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002002def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002003 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002004 [(set FR32X:$dst, (bitconvert GR32:$src))],
2005 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2006
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002007def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002008 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002009 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2010 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002011}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002012
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002013// Move doubleword from xmm register to r/m32
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002014//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002015def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002016 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002017 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2018 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2019 EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002020def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002021 (ins i32mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002022 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002023 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2024 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2025 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2026
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002027// Move quadword from xmm1 register to r/m64
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002028//
2029def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002030 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002031 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2032 (iPTR 0)))],
Craig Topperae11aed2014-01-14 07:41:20 +00002033 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002034 Requires<[HasAVX512, In64BitMode]>;
2035
Elena Demikhovsky85aeffa2013-10-03 12:03:26 +00002036def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002037 (ins i64mem:$dst, VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002038 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002039 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2040 addr:$dst)], IIC_SSE_MOVDQ>,
Craig Topperae11aed2014-01-14 07:41:20 +00002041 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002042 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2043
2044// Move Scalar Single to Double Int
2045//
Craig Topper88adf2a2013-10-12 05:41:08 +00002046let isCodeGenOnly = 1 in {
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002047def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002048 (ins FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002049 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002050 [(set GR32:$dst, (bitconvert FR32X:$src))],
2051 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002052def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002053 (ins i32mem:$dst, FR32X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002054 "vmovd\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002055 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2056 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Craig Topper88adf2a2013-10-12 05:41:08 +00002057}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002058
2059// Move Quadword Int to Packed Quadword Int
2060//
Elena Demikhovsky767fc962014-01-14 15:10:08 +00002061def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002062 (ins i64mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002063 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002064 [(set VR128X:$dst,
2065 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2066 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2067
2068//===----------------------------------------------------------------------===//
2069// AVX-512 MOVSS, MOVSD
2070//===----------------------------------------------------------------------===//
2071
2072multiclass avx512_move_scalar <string asm, RegisterClass RC,
2073 SDNode OpNode, ValueType vt,
2074 X86MemOperand x86memop, PatFrag mem_pat> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002075 let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002076 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002077 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002078 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2079 (scalar_to_vector RC:$src2))))],
2080 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002081 let Constraints = "$src1 = $dst" in
2082 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2083 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2084 !strconcat(asm,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002085 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002086 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002087 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002088 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002089 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2090 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002091 let mayStore = 1 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002092 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002093 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002094 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2095 EVEX, VEX_LIG;
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002096 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2097 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2098 [], IIC_SSE_MOV_S_MR>,
2099 EVEX, VEX_LIG, EVEX_K;
2100 } // mayStore
Elena Demikhovskyf404e052014-01-05 14:21:07 +00002101 } //hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002102}
2103
2104let ExeDomain = SSEPackedSingle in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002105defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002106 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2107
2108let ExeDomain = SSEPackedDouble in
Elena Demikhovskycf088092013-12-11 14:31:04 +00002109defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002110 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2111
Elena Demikhovsky47fc44e2013-12-16 13:52:35 +00002112def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2113 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2114 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2115
2116def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2117 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2118 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002119
Elena Demikhovskyff620ed2014-08-27 07:38:43 +00002120def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2121 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2122 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2123
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002124// For the disassembler
Craig Topper3484fc22014-01-05 04:17:28 +00002125let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002126 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2127 (ins VR128X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002128 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002129 IIC_SSE_MOV_S_RR>,
2130 XS, EVEX_4V, VEX_LIG;
2131 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2132 (ins VR128X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002133 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002134 IIC_SSE_MOV_S_RR>,
2135 XD, EVEX_4V, VEX_LIG, VEX_W;
2136}
2137
2138let Predicates = [HasAVX512] in {
2139 let AddedComplexity = 15 in {
2140 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2141 // MOVS{S,D} to the lower bits.
2142 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2143 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2144 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2145 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2146 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2147 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2148 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2149 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2150
2151 // Move low f32 and clear high bits.
2152 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2153 (SUBREG_TO_REG (i32 0),
2154 (VMOVSSZrr (v4f32 (V_SET0)),
2155 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2156 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2157 (SUBREG_TO_REG (i32 0),
2158 (VMOVSSZrr (v4i32 (V_SET0)),
2159 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2160 }
2161
2162 let AddedComplexity = 20 in {
2163 // MOVSSrm zeros the high parts of the register; represent this
2164 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2165 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2166 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2167 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2168 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2169 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2170 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2171
2172 // MOVSDrm zeros the high parts of the register; represent this
2173 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2174 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2175 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2176 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2177 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2178 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2179 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2180 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2181 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2182 def : Pat<(v2f64 (X86vzload addr:$src)),
2183 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2184
2185 // Represent the same patterns above but in the form they appear for
2186 // 256-bit types
2187 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2188 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002189 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002190 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2191 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2192 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2193 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2194 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2195 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2196 }
2197 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2198 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2199 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2200 FR32X:$src)), sub_xmm)>;
2201 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2202 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2203 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2204 FR64X:$src)), sub_xmm)>;
2205 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2206 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
Elena Demikhovsky34586e72013-10-02 12:20:42 +00002207 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002208
2209 // Move low f64 and clear high bits.
2210 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2211 (SUBREG_TO_REG (i32 0),
2212 (VMOVSDZrr (v2f64 (V_SET0)),
2213 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2214
2215 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2216 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2217 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2218
2219 // Extract and store.
2220 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2221 addr:$dst),
2222 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2223 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2224 addr:$dst),
2225 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2226
2227 // Shuffle with VMOVSS
2228 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2229 (VMOVSSZrr (v4i32 VR128X:$src1),
2230 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2231 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2232 (VMOVSSZrr (v4f32 VR128X:$src1),
2233 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2234
2235 // 256-bit variants
2236 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2237 (SUBREG_TO_REG (i32 0),
2238 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2239 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2240 sub_xmm)>;
2241 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2242 (SUBREG_TO_REG (i32 0),
2243 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2244 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2245 sub_xmm)>;
2246
2247 // Shuffle with VMOVSD
2248 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2249 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2250 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2251 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2252 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2253 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2254 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2255 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2256
2257 // 256-bit variants
2258 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2259 (SUBREG_TO_REG (i32 0),
2260 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2261 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2262 sub_xmm)>;
2263 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2264 (SUBREG_TO_REG (i32 0),
2265 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2266 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2267 sub_xmm)>;
2268
2269 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2270 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2271 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2272 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2273 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2274 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2275 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2276 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2277}
2278
2279let AddedComplexity = 15 in
2280def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2281 (ins VR128X:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002282 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002283 [(set VR128X:$dst, (v2i64 (X86vzmovl
2284 (v2i64 VR128X:$src))))],
2285 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2286
2287let AddedComplexity = 20 in
2288def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2289 (ins i128mem:$src),
Elena Demikhovskycf088092013-12-11 14:31:04 +00002290 "vmovq\t{$src, $dst|$dst, $src}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002291 [(set VR128X:$dst, (v2i64 (X86vzmovl
2292 (loadv2i64 addr:$src))))],
2293 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2294 EVEX_CD8<8, CD8VT8>;
2295
2296let Predicates = [HasAVX512] in {
2297 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2298 let AddedComplexity = 20 in {
2299 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2300 (VMOVDI2PDIZrm addr:$src)>;
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002301 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2302 (VMOV64toPQIZrr GR64:$src)>;
2303 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2304 (VMOVDI2PDIZrr GR32:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002305
2306 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2307 (VMOVDI2PDIZrm addr:$src)>;
2308 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2309 (VMOVDI2PDIZrm addr:$src)>;
2310 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2311 (VMOVZPQILo2PQIZrm addr:$src)>;
2312 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2313 (VMOVZPQILo2PQIZrr VR128X:$src)>;
Cameron McInally30bbb212013-12-05 00:11:25 +00002314 def : Pat<(v2i64 (X86vzload addr:$src)),
2315 (VMOVZPQILo2PQIZrm addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002316 }
Elena Demikhovsky3b75f5d2013-10-01 08:38:02 +00002317
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002318 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2319 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2320 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2321 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2322 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2323 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2324 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2325}
2326
2327def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2328 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2329
2330def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2331 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2332
2333def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2334 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2335
2336def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2337 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2338
2339//===----------------------------------------------------------------------===//
Adam Nemet7f62b232014-06-10 16:39:53 +00002340// AVX-512 - Non-temporals
2341//===----------------------------------------------------------------------===//
Robert Khasanoved882972014-08-13 10:46:00 +00002342let SchedRW = [WriteLoad] in {
2343 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2344 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2345 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2346 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2347 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002348
Robert Khasanoved882972014-08-13 10:46:00 +00002349 let Predicates = [HasAVX512, HasVLX] in {
2350 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2351 (ins i256mem:$src),
2352 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2353 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2354 EVEX_CD8<64, CD8VF>;
Adam Nemet7f62b232014-06-10 16:39:53 +00002355
Robert Khasanoved882972014-08-13 10:46:00 +00002356 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2357 (ins i128mem:$src),
2358 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2359 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2360 EVEX_CD8<64, CD8VF>;
2361 }
Adam Nemetefd07852014-06-18 16:51:10 +00002362}
2363
Robert Khasanoved882972014-08-13 10:46:00 +00002364multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2365 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2366 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2367 let SchedRW = [WriteStore], mayStore = 1,
2368 AddedComplexity = 400 in
2369 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2370 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2371 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2372}
2373
2374multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2375 string elty, string elsz, string vsz512,
2376 string vsz256, string vsz128, Domain d,
2377 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2378 let Predicates = [prd] in
2379 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2380 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2381 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2382 EVEX_V512;
2383
2384 let Predicates = [prd, HasVLX] in {
2385 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2386 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2387 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2388 EVEX_V256;
2389
2390 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2391 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2392 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2393 EVEX_V128;
2394 }
2395}
2396
2397defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2398 "i", "64", "8", "4", "2", SSEPackedInt,
2399 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2400
2401defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2402 "f", "64", "8", "4", "2", SSEPackedDouble,
2403 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2404
2405defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2406 "f", "32", "16", "8", "4", SSEPackedSingle,
2407 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2408
Adam Nemet7f62b232014-06-10 16:39:53 +00002409//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002410// AVX-512 - Integer arithmetic
2411//
2412multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002413 ValueType OpVT, RegisterClass KRC,
2414 RegisterClass RC, PatFrag memop_frag,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002415 X86MemOperand x86memop, PatFrag scalar_mfrag,
2416 X86MemOperand x86scalar_mop, string BrdcstStr,
2417 OpndItins itins, bit IsCommutable = 0> {
2418 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002419 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2420 (ins RC:$src1, RC:$src2),
2421 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2422 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2423 itins.rr>, EVEX_4V;
2424 let AddedComplexity = 30 in {
2425 let Constraints = "$src0 = $dst" in
2426 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2427 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2428 !strconcat(OpcodeStr,
2429 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2430 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2431 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2432 RC:$src0)))],
2433 itins.rr>, EVEX_4V, EVEX_K;
2434 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2435 (ins KRC:$mask, RC:$src1, RC:$src2),
2436 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2437 "|$dst {${mask}} {z}, $src1, $src2}"),
2438 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2439 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2440 (OpVT immAllZerosV))))],
2441 itins.rr>, EVEX_4V, EVEX_KZ;
2442 }
2443
2444 let mayLoad = 1 in {
2445 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2446 (ins RC:$src1, x86memop:$src2),
2447 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2448 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2449 itins.rm>, EVEX_4V;
2450 let AddedComplexity = 30 in {
2451 let Constraints = "$src0 = $dst" in
2452 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2453 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2454 !strconcat(OpcodeStr,
2455 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2456 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2457 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2458 RC:$src0)))],
2459 itins.rm>, EVEX_4V, EVEX_K;
2460 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2461 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2462 !strconcat(OpcodeStr,
2463 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2464 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2465 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2466 (OpVT immAllZerosV))))],
2467 itins.rm>, EVEX_4V, EVEX_KZ;
2468 }
2469 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2470 (ins RC:$src1, x86scalar_mop:$src2),
2471 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2472 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2473 [(set RC:$dst, (OpNode RC:$src1,
2474 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2475 itins.rm>, EVEX_4V, EVEX_B;
2476 let AddedComplexity = 30 in {
2477 let Constraints = "$src0 = $dst" in
2478 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2479 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2480 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2481 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2482 BrdcstStr, "}"),
2483 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2484 (OpNode (OpVT RC:$src1),
2485 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2486 RC:$src0)))],
2487 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2488 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2489 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2490 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2491 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2492 BrdcstStr, "}"),
2493 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2494 (OpNode (OpVT RC:$src1),
2495 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2496 (OpVT immAllZerosV))))],
2497 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2498 }
2499 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002500}
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002501
2502multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2503 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2504 PatFrag memop_frag, X86MemOperand x86memop,
2505 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2506 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002507 let isCommutable = IsCommutable in
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002508 {
2509 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002510 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002511 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002512 []>, EVEX_4V;
2513 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2514 (ins KRC:$mask, RC:$src1, RC:$src2),
2515 !strconcat(OpcodeStr,
2516 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2517 [], itins.rr>, EVEX_4V, EVEX_K;
2518 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2519 (ins KRC:$mask, RC:$src1, RC:$src2),
2520 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2521 "|$dst {${mask}} {z}, $src1, $src2}"),
2522 [], itins.rr>, EVEX_4V, EVEX_KZ;
2523 }
2524 let mayLoad = 1 in {
2525 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2526 (ins RC:$src1, x86memop:$src2),
2527 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2528 []>, EVEX_4V;
2529 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2530 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2531 !strconcat(OpcodeStr,
2532 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2533 [], itins.rm>, EVEX_4V, EVEX_K;
2534 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2535 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2536 !strconcat(OpcodeStr,
2537 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2538 [], itins.rm>, EVEX_4V, EVEX_KZ;
2539 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2540 (ins RC:$src1, x86scalar_mop:$src2),
2541 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2542 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2543 [], itins.rm>, EVEX_4V, EVEX_B;
2544 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2545 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2546 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2547 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2548 BrdcstStr, "}"),
2549 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2550 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2551 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2552 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2553 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2554 BrdcstStr, "}"),
2555 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2556 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002557}
2558
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002559defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2560 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2561 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002562
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002563defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2564 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2565 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002566
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002567defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2568 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2569 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002570
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002571defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2572 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2573 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002574
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002575defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2576 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2577 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002578
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002579defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2580 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2581 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2582 EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002583
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002584defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2585 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2586 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002587
2588def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2589 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2590
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002591def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2592 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2593 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2594def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2595 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2596 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2597
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002598defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2599 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2600 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002601 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002602defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2603 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2604 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002605 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002606
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002607defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2608 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2609 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002610 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002611defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2612 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2613 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002614 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002615
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002616defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2617 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2618 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002619 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002620defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2621 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2622 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002623 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002624
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002625defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2626 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2627 SSE_INTALU_ITINS_P, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002628 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002629defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2630 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2631 SSE_INTALU_ITINS_P, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002632 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky199c8232013-10-27 08:18:37 +00002633
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00002634def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2635 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2636 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2637def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2638 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2639 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2640def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2641 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2642 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2643def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2644 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2645 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2646def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2647 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2648 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2649def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2650 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2651 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2652def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2653 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2654 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2655def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2656 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2657 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002658//===----------------------------------------------------------------------===//
2659// AVX-512 - Unpack Instructions
2660//===----------------------------------------------------------------------===//
2661
2662multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2663 PatFrag mem_frag, RegisterClass RC,
2664 X86MemOperand x86memop, string asm,
2665 Domain d> {
2666 def rr : AVX512PI<opc, MRMSrcReg,
2667 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2668 asm, [(set RC:$dst,
2669 (vt (OpNode RC:$src1, RC:$src2)))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002670 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002671 def rm : AVX512PI<opc, MRMSrcMem,
2672 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2673 asm, [(set RC:$dst,
2674 (vt (OpNode RC:$src1,
2675 (bitconvert (mem_frag addr:$src2)))))],
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00002676 d>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002677}
2678
2679defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2680 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002681 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002682defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2683 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002684 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002685defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2686 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topper5ccb6172014-02-18 00:21:49 +00002687 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002688defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2689 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Craig Topperae11aed2014-01-14 07:41:20 +00002690 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002691
2692multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2693 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2694 X86MemOperand x86memop> {
2695 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2696 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002697 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002698 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2699 IIC_SSE_UNPCK>, EVEX_4V;
2700 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2701 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002702 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002703 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2704 (bitconvert (memop_frag addr:$src2)))))],
2705 IIC_SSE_UNPCK>, EVEX_4V;
2706}
2707defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2708 VR512, memopv16i32, i512mem>, EVEX_V512,
2709 EVEX_CD8<32, CD8VF>;
2710defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2711 VR512, memopv8i64, i512mem>, EVEX_V512,
2712 VEX_W, EVEX_CD8<64, CD8VF>;
2713defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2714 VR512, memopv16i32, i512mem>, EVEX_V512,
2715 EVEX_CD8<32, CD8VF>;
2716defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2717 VR512, memopv8i64, i512mem>, EVEX_V512,
2718 VEX_W, EVEX_CD8<64, CD8VF>;
2719//===----------------------------------------------------------------------===//
2720// AVX-512 - PSHUFD
2721//
2722
2723multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2724 SDNode OpNode, PatFrag mem_frag,
2725 X86MemOperand x86memop, ValueType OpVT> {
2726 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2727 (ins RC:$src1, i8imm:$src2),
2728 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002729 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002730 [(set RC:$dst,
2731 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2732 EVEX;
2733 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2734 (ins x86memop:$src1, i8imm:$src2),
2735 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002736 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002737 [(set RC:$dst,
2738 (OpVT (OpNode (mem_frag addr:$src1),
2739 (i8 imm:$src2))))]>, EVEX;
2740}
2741
2742defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
Craig Topperae11aed2014-01-14 07:41:20 +00002743 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002744
2745let ExeDomain = SSEPackedSingle in
2746defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002747 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002748 EVEX_CD8<32, CD8VF>;
2749let ExeDomain = SSEPackedDouble in
2750defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
Craig Topperae11aed2014-01-14 07:41:20 +00002751 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002752 VEX_W, EVEX_CD8<32, CD8VF>;
2753
2754def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2755 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2756def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2757 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2758
2759//===----------------------------------------------------------------------===//
2760// AVX-512 Logical Instructions
2761//===----------------------------------------------------------------------===//
2762
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002763defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002764 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2765 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002766defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002767 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2768 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002769defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002770 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2771 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002772defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002773 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2774 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002775defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002776 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2777 EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002778defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002779 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2780 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002781defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002782 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2783 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00002784defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2785 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2786 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002787
2788//===----------------------------------------------------------------------===//
2789// AVX-512 FP arithmetic
2790//===----------------------------------------------------------------------===//
2791
2792multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2793 SizeItins itins> {
Elena Demikhovskycf088092013-12-11 14:31:04 +00002794 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002795 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2796 EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00002797 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002798 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2799 EVEX_CD8<64, CD8VT1>;
2800}
2801
2802let isCommutable = 1 in {
2803defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2804defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2805defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2806defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2807}
2808let isCommutable = 0 in {
2809defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2810defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2811}
2812
2813multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002814 RegisterClass KRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002815 RegisterClass RC, ValueType vt,
2816 X86MemOperand x86memop, PatFrag mem_frag,
2817 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2818 string BrdcstStr,
2819 Domain d, OpndItins itins, bit commutable> {
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002820 let isCommutable = commutable in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002821 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002822 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002823 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Craig Topperda7160d2014-02-01 08:17:56 +00002824 EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002825
2826 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2827 !strconcat(OpcodeStr,
2828 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2829 [], itins.rr, d>, EVEX_4V, EVEX_K;
2830
2831 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2832 !strconcat(OpcodeStr,
2833 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2834 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2835 }
2836
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002837 let mayLoad = 1 in {
2838 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002839 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002840 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
Craig Topperda7160d2014-02-01 08:17:56 +00002841 itins.rm, d>, EVEX_4V;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002842
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002843 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2844 (ins RC:$src1, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002845 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002846 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002847 [(set RC:$dst, (OpNode RC:$src1,
2848 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
Craig Topperda7160d2014-02-01 08:17:56 +00002849 itins.rm, d>, EVEX_4V, EVEX_B;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002850
2851 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2852 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2853 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2854 [], itins.rm, d>, EVEX_4V, EVEX_K;
2855
2856 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2857 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2858 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2859 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2860
2861 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2862 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2863 " \t{${src2}", BrdcstStr,
2864 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2865 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2866
2867 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2868 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2869 " \t{${src2}", BrdcstStr,
2870 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2871 BrdcstStr, "}"),
2872 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2873 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002874}
2875
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002876defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002877 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002878 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002879
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002880defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002881 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2882 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002883 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002884
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002885defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002886 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002887 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002888defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002889 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2890 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002891 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002892
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002893defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002894 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2895 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002896 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002897defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002898 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2899 SSE_ALU_ITINS_P.s, 1>,
Craig Topper5ccb6172014-02-18 00:21:49 +00002900 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002901
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002902defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002903 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2904 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002905 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002906defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002907 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2908 SSE_ALU_ITINS_P.d, 1>,
Craig Topperae11aed2014-01-14 07:41:20 +00002909 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002910
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002911defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002912 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002913 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002914defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002915 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
Craig Topper5ccb6172014-02-18 00:21:49 +00002916 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002917
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002918defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002919 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2920 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002921 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyf7c1b162014-03-06 08:45:30 +00002922defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002923 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2924 SSE_ALU_ITINS_P.d, 0>,
Craig Topperae11aed2014-01-14 07:41:20 +00002925 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002926
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00002927def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2928 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2929 (i16 -1), FROUND_CURRENT)),
2930 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
2931
2932def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
2933 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2934 (i8 -1), FROUND_CURRENT)),
2935 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
2936
2937def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
2938 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2939 (i16 -1), FROUND_CURRENT)),
2940 (VMINPSZrr VR512:$src1, VR512:$src2)>;
2941
2942def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
2943 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2944 (i8 -1), FROUND_CURRENT)),
2945 (VMINPDZrr VR512:$src1, VR512:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002946//===----------------------------------------------------------------------===//
2947// AVX-512 VPTESTM instructions
2948//===----------------------------------------------------------------------===//
2949
2950multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2951 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
2952 SDNode OpNode, ValueType vt> {
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002953 def rr : AVX512PI<opc, MRMSrcReg,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002954 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002955 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002956 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
2957 SSEPackedInt>, EVEX_4V;
2958 def rm : AVX512PI<opc, MRMSrcMem,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002959 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002960 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002961 [(set KRC:$dst, (OpNode (vt RC:$src1),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002962 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002963}
2964
2965defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002966 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002967 EVEX_CD8<32, CD8VF>;
2968defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002969 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002970 EVEX_CD8<64, CD8VF>;
2971
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002972let Predicates = [HasCDI] in {
2973defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
2974 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
2975 EVEX_CD8<32, CD8VF>;
2976defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002977 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
Elena Demikhovskya30e4372014-02-05 07:05:03 +00002978 EVEX_CD8<64, CD8VF>;
2979}
2980
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00002981def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
2982 (v16i32 VR512:$src2), (i16 -1))),
2983 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
2984
2985def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
2986 (v8i64 VR512:$src2), (i8 -1))),
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +00002987 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002988//===----------------------------------------------------------------------===//
2989// AVX-512 Shift instructions
2990//===----------------------------------------------------------------------===//
2991multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
2992 string OpcodeStr, SDNode OpNode, RegisterClass RC,
2993 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
2994 RegisterClass KRC> {
2995 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00002996 (ins RC:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00002997 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Lang Hames27839932013-10-21 17:51:24 +00002998 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00002999 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3000 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003001 (ins KRC:$mask, RC:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003002 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003003 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003004 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3005 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003006 (ins x86memop:$src1, i8imm:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003007 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003008 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
Lang Hames27839932013-10-21 17:51:24 +00003009 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003010 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
Lang Hames27839932013-10-21 17:51:24 +00003011 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003012 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003013 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003014 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3015}
3016
3017multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3018 RegisterClass RC, ValueType vt, ValueType SrcVT,
3019 PatFrag bc_frag, RegisterClass KRC> {
3020 // src2 is always 128-bit
3021 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3022 (ins RC:$src1, VR128X:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003023 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003024 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3025 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3026 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3027 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3028 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003029 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003030 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3031 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3032 (ins RC:$src1, i128mem:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003033 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003034 [(set RC:$dst, (vt (OpNode RC:$src1,
3035 (bc_frag (memopv2i64 addr:$src2)))))],
3036 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3037 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3038 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3039 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003040 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003041 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3042}
3043
3044defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3045 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3046 EVEX_V512, EVEX_CD8<32, CD8VF>;
3047defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3048 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3049 EVEX_CD8<32, CD8VQ>;
3050
3051defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3052 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3053 EVEX_CD8<64, CD8VF>, VEX_W;
3054defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3055 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3056 EVEX_CD8<64, CD8VQ>, VEX_W;
3057
3058defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3059 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3060 EVEX_CD8<32, CD8VF>;
3061defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3062 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3063 EVEX_CD8<32, CD8VQ>;
3064
3065defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3066 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3067 EVEX_CD8<64, CD8VF>, VEX_W;
3068defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3069 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3070 EVEX_CD8<64, CD8VQ>, VEX_W;
3071
3072defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3073 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3074 EVEX_V512, EVEX_CD8<32, CD8VF>;
3075defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3076 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3077 EVEX_CD8<32, CD8VQ>;
3078
3079defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3080 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3081 EVEX_CD8<64, CD8VF>, VEX_W;
3082defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3083 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3084 EVEX_CD8<64, CD8VQ>, VEX_W;
3085
3086//===-------------------------------------------------------------------===//
3087// Variable Bit Shifts
3088//===-------------------------------------------------------------------===//
3089multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3090 RegisterClass RC, ValueType vt,
3091 X86MemOperand x86memop, PatFrag mem_frag> {
3092 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3093 (ins RC:$src1, RC:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003094 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003095 [(set RC:$dst,
3096 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3097 EVEX_4V;
3098 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3099 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003100 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003101 [(set RC:$dst,
3102 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3103 EVEX_4V;
3104}
3105
3106defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3107 i512mem, memopv16i32>, EVEX_V512,
3108 EVEX_CD8<32, CD8VF>;
3109defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3110 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3111 EVEX_CD8<64, CD8VF>;
3112defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3113 i512mem, memopv16i32>, EVEX_V512,
3114 EVEX_CD8<32, CD8VF>;
3115defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3116 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3117 EVEX_CD8<64, CD8VF>;
3118defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3119 i512mem, memopv16i32>, EVEX_V512,
3120 EVEX_CD8<32, CD8VF>;
3121defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3122 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3123 EVEX_CD8<64, CD8VF>;
3124
3125//===----------------------------------------------------------------------===//
3126// AVX-512 - MOVDDUP
3127//===----------------------------------------------------------------------===//
3128
3129multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3130 X86MemOperand x86memop, PatFrag memop_frag> {
3131def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003132 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003133 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3134def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003135 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003136 [(set RC:$dst,
3137 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3138}
3139
3140defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3141 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3142def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3143 (VMOVDDUPZrm addr:$src)>;
3144
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003145//===---------------------------------------------------------------------===//
3146// Replicate Single FP - MOVSHDUP and MOVSLDUP
3147//===---------------------------------------------------------------------===//
3148multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3149 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3150 X86MemOperand x86memop> {
3151 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003152 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003153 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3154 let mayLoad = 1 in
3155 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003156 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00003157 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3158}
3159
3160defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3161 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3162 EVEX_CD8<32, CD8VF>;
3163defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3164 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3165 EVEX_CD8<32, CD8VF>;
3166
3167def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3168def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3169 (VMOVSHDUPZrm addr:$src)>;
3170def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3171def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3172 (VMOVSLDUPZrm addr:$src)>;
3173
3174//===----------------------------------------------------------------------===//
3175// Move Low to High and High to Low packed FP Instructions
3176//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003177def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3178 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003179 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003180 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3181 IIC_SSE_MOV_LH>, EVEX_4V;
3182def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3183 (ins VR128X:$src1, VR128X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003184 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003185 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3186 IIC_SSE_MOV_LH>, EVEX_4V;
3187
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003188let Predicates = [HasAVX512] in {
3189 // MOVLHPS patterns
3190 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3191 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3192 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3193 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003194
Craig Topperdbe8b7d2013-09-27 07:20:47 +00003195 // MOVHLPS patterns
3196 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3197 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3198}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003199
3200//===----------------------------------------------------------------------===//
3201// FMA - Fused Multiply Operations
3202//
3203let Constraints = "$src1 = $dst" in {
3204multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3205 RegisterClass RC, X86MemOperand x86memop,
3206 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
Adam Nemet2e91ee52014-08-14 17:13:19 +00003207 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3208 RegisterClass KRC> {
3209 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3210 (ins RC:$src2, RC:$src3),
3211 OpcodeStr, "$src3, $src2", "$src2, $src3",
3212 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3213 AVX512FMA3Base;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003214
3215 let mayLoad = 1 in
3216 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3217 (ins RC:$src1, RC:$src2, x86memop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003218 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003219 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3220 (mem_frag addr:$src3))))]>;
3221 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3222 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003223 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003224 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3225 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3226 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3227}
3228} // Constraints = "$src1 = $dst"
3229
3230let ExeDomain = SSEPackedSingle in {
3231 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3232 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003233 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003234 EVEX_CD8<32, CD8VF>;
3235 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3236 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003237 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003238 EVEX_CD8<32, CD8VF>;
3239 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3240 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003241 X86Fmaddsub, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003242 EVEX_V512, EVEX_CD8<32, CD8VF>;
3243 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3244 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003245 X86Fmsubadd, v16f32, VK16WM>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003246 EVEX_V512, EVEX_CD8<32, CD8VF>;
3247 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3248 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003249 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003250 EVEX_CD8<32, CD8VF>;
3251 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3252 memopv16f32, f32mem, loadf32, "{1to16}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003253 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003254 EVEX_CD8<32, CD8VF>;
3255}
3256let ExeDomain = SSEPackedDouble in {
3257 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3258 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003259 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003260 VEX_W, EVEX_CD8<64, CD8VF>;
3261 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3262 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003263 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003264 EVEX_CD8<64, CD8VF>;
3265 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3266 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003267 X86Fmaddsub, v8f64, VK8WM>,
3268 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003269 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3270 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003271 X86Fmsubadd, v8f64, VK8WM>,
3272 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003273 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3274 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003275 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003276 EVEX_CD8<64, CD8VF>;
3277 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3278 memopv8f64, f64mem, loadf64, "{1to8}",
Adam Nemet2e91ee52014-08-14 17:13:19 +00003279 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003280 EVEX_CD8<64, CD8VF>;
3281}
3282
3283let Constraints = "$src1 = $dst" in {
3284multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3285 RegisterClass RC, X86MemOperand x86memop,
3286 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3287 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3288 let mayLoad = 1 in
3289 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3290 (ins RC:$src1, RC:$src3, x86memop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003291 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003292 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3293 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3294 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003295 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003296 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3297 [(set RC:$dst, (OpNode RC:$src1,
3298 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3299}
3300} // Constraints = "$src1 = $dst"
3301
3302
3303let ExeDomain = SSEPackedSingle in {
3304 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3305 memopv16f32, f32mem, loadf32, "{1to16}",
3306 X86Fmadd, v16f32>, EVEX_V512,
3307 EVEX_CD8<32, CD8VF>;
3308 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3309 memopv16f32, f32mem, loadf32, "{1to16}",
3310 X86Fmsub, v16f32>, EVEX_V512,
3311 EVEX_CD8<32, CD8VF>;
3312 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3313 memopv16f32, f32mem, loadf32, "{1to16}",
3314 X86Fmaddsub, v16f32>,
3315 EVEX_V512, EVEX_CD8<32, CD8VF>;
3316 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3317 memopv16f32, f32mem, loadf32, "{1to16}",
3318 X86Fmsubadd, v16f32>,
3319 EVEX_V512, EVEX_CD8<32, CD8VF>;
3320 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3321 memopv16f32, f32mem, loadf32, "{1to16}",
3322 X86Fnmadd, v16f32>, EVEX_V512,
3323 EVEX_CD8<32, CD8VF>;
3324 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3325 memopv16f32, f32mem, loadf32, "{1to16}",
3326 X86Fnmsub, v16f32>, EVEX_V512,
3327 EVEX_CD8<32, CD8VF>;
3328}
3329let ExeDomain = SSEPackedDouble in {
3330 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3331 memopv8f64, f64mem, loadf64, "{1to8}",
3332 X86Fmadd, v8f64>, EVEX_V512,
3333 VEX_W, EVEX_CD8<64, CD8VF>;
3334 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3335 memopv8f64, f64mem, loadf64, "{1to8}",
3336 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3337 EVEX_CD8<64, CD8VF>;
3338 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3339 memopv8f64, f64mem, loadf64, "{1to8}",
3340 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3341 EVEX_CD8<64, CD8VF>;
3342 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3343 memopv8f64, f64mem, loadf64, "{1to8}",
3344 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3345 EVEX_CD8<64, CD8VF>;
3346 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3347 memopv8f64, f64mem, loadf64, "{1to8}",
3348 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3349 EVEX_CD8<64, CD8VF>;
3350 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3351 memopv8f64, f64mem, loadf64, "{1to8}",
3352 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3353 EVEX_CD8<64, CD8VF>;
3354}
3355
3356// Scalar FMA
3357let Constraints = "$src1 = $dst" in {
3358multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3359 RegisterClass RC, ValueType OpVT,
3360 X86MemOperand x86memop, Operand memop,
3361 PatFrag mem_frag> {
3362 let isCommutable = 1 in
3363 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3364 (ins RC:$src1, RC:$src2, RC:$src3),
3365 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003366 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003367 [(set RC:$dst,
3368 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3369 let mayLoad = 1 in
3370 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3371 (ins RC:$src1, RC:$src2, f128mem:$src3),
3372 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003373 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003374 [(set RC:$dst,
3375 (OpVT (OpNode RC:$src2, RC:$src1,
3376 (mem_frag addr:$src3))))]>;
3377}
3378
3379} // Constraints = "$src1 = $dst"
3380
Elena Demikhovskycf088092013-12-11 14:31:04 +00003381defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003382 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003383defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003384 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003385defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003386 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003387defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003388 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003389defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003390 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003391defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003392 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003393defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003394 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003395defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003396 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3397
3398//===----------------------------------------------------------------------===//
3399// AVX-512 Scalar convert from sign integer to float/double
3400//===----------------------------------------------------------------------===//
3401
3402multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3403 X86MemOperand x86memop, string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003404let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003405 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003406 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003407 EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003408 let mayLoad = 1 in
3409 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3410 (ins DstRC:$src1, x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003411 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003412 EVEX_4V;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003413} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003414}
Andrew Trick15a47742013-10-09 05:11:10 +00003415let Predicates = [HasAVX512] in {
Elena Demikhovskycf088092013-12-11 14:31:04 +00003416defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003417 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003418defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003419 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003420defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003421 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003422defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003423 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3424
3425def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3426 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3427def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003428 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003429def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3430 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3431def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003432 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003433
3434def : Pat<(f32 (sint_to_fp GR32:$src)),
3435 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3436def : Pat<(f32 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003437 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003438def : Pat<(f64 (sint_to_fp GR32:$src)),
3439 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3440def : Pat<(f64 (sint_to_fp GR64:$src)),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003441 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3442
Elena Demikhovskycf088092013-12-11 14:31:04 +00003443defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003444 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003445defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003446 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003447defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003448 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003449defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003450 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3451
3452def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3453 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3454def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3455 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3456def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3457 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3458def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3459 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3460
3461def : Pat<(f32 (uint_to_fp GR32:$src)),
3462 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3463def : Pat<(f32 (uint_to_fp GR64:$src)),
3464 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3465def : Pat<(f64 (uint_to_fp GR32:$src)),
3466 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3467def : Pat<(f64 (uint_to_fp GR64:$src)),
3468 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
Andrew Trick15a47742013-10-09 05:11:10 +00003469}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003470
3471//===----------------------------------------------------------------------===//
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003472// AVX-512 Scalar convert from float/double to integer
3473//===----------------------------------------------------------------------===//
3474multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3475 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3476 string asm> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003477let hasSideEffects = 0 in {
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003478 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003479 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003480 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3481 Requires<[HasAVX512]>;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003482 let mayLoad = 1 in
3483 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003484 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003485 Requires<[HasAVX512]>;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003486} // hasSideEffects = 0
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003487}
3488let Predicates = [HasAVX512] in {
3489// Convert float/double to signed/unsigned int 32/64
3490defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003491 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003492 XS, EVEX_CD8<32, CD8VT1>;
3493defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003494 ssmem, sse_load_f32, "cvtss2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003495 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3496defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003497 ssmem, sse_load_f32, "cvtss2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003498 XS, EVEX_CD8<32, CD8VT1>;
3499defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3500 int_x86_avx512_cvtss2usi64, ssmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003501 sse_load_f32, "cvtss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003502 EVEX_CD8<32, CD8VT1>;
3503defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003504 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003505 XD, EVEX_CD8<64, CD8VT1>;
3506defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003507 sdmem, sse_load_f64, "cvtsd2si">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003508 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3509defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003510 sdmem, sse_load_f64, "cvtsd2usi">,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003511 XD, EVEX_CD8<64, CD8VT1>;
3512defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3513 int_x86_avx512_cvtsd2usi64, sdmem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003514 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003515 EVEX_CD8<64, CD8VT1>;
3516
Craig Topper9dd48c82014-01-02 17:28:14 +00003517let isCodeGenOnly = 1 in {
3518 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3519 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3520 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3521 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3522 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3523 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3524 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3525 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3526 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3527 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3528 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3529 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003530
Craig Topper9dd48c82014-01-02 17:28:14 +00003531 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3532 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3533 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3534 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3535 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3536 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3537 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3538 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3539 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3540 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3541 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3542 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3543} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003544
3545// Convert float/double to signed/unsigned int 32/64 with truncation
Craig Topper9dd48c82014-01-02 17:28:14 +00003546let isCodeGenOnly = 1 in {
3547 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3548 ssmem, sse_load_f32, "cvttss2si">,
3549 XS, EVEX_CD8<32, CD8VT1>;
3550 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3551 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3552 "cvttss2si">, XS, VEX_W,
3553 EVEX_CD8<32, CD8VT1>;
3554 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3555 sdmem, sse_load_f64, "cvttsd2si">, XD,
3556 EVEX_CD8<64, CD8VT1>;
3557 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3558 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3559 "cvttsd2si">, XD, VEX_W,
3560 EVEX_CD8<64, CD8VT1>;
3561 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3562 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3563 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3564 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3565 int_x86_avx512_cvttss2usi64, ssmem,
3566 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3567 EVEX_CD8<32, CD8VT1>;
3568 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3569 int_x86_avx512_cvttsd2usi,
3570 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3571 EVEX_CD8<64, CD8VT1>;
3572 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3573 int_x86_avx512_cvttsd2usi64, sdmem,
3574 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3575 EVEX_CD8<64, CD8VT1>;
3576} // isCodeGenOnly = 1
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003577
3578multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3579 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3580 string asm> {
3581 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003582 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003583 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3584 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003585 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003586 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3587}
3588
3589defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003590 loadf32, "cvttss2si">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003591 EVEX_CD8<32, CD8VT1>;
3592defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003593 loadf32, "cvttss2usi">, XS,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003594 EVEX_CD8<32, CD8VT1>;
3595defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003596 loadf32, "cvttss2si">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003597 EVEX_CD8<32, CD8VT1>;
3598defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003599 loadf32, "cvttss2usi">, XS, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003600 EVEX_CD8<32, CD8VT1>;
3601defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003602 loadf64, "cvttsd2si">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003603 EVEX_CD8<64, CD8VT1>;
3604defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003605 loadf64, "cvttsd2usi">, XD,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003606 EVEX_CD8<64, CD8VT1>;
3607defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003608 loadf64, "cvttsd2si">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003609 EVEX_CD8<64, CD8VT1>;
3610defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
Elena Demikhovskycf088092013-12-11 14:31:04 +00003611 loadf64, "cvttsd2usi">, XD, VEX_W,
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003612 EVEX_CD8<64, CD8VT1>;
Elena Demikhovskycf088092013-12-11 14:31:04 +00003613} // HasAVX512
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +00003614//===----------------------------------------------------------------------===//
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003615// AVX-512 Convert form float to double and back
3616//===----------------------------------------------------------------------===//
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003617let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003618def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3619 (ins FR32X:$src1, FR32X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003620 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003621 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3622let mayLoad = 1 in
3623def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3624 (ins FR32X:$src1, f32mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003625 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003626 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3627 EVEX_CD8<32, CD8VT1>;
3628
3629// Convert scalar double to scalar single
3630def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3631 (ins FR64X:$src1, FR64X:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003632 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003633 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3634let mayLoad = 1 in
3635def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3636 (ins FR64X:$src1, f64mem:$src2),
Elena Demikhovskycf088092013-12-11 14:31:04 +00003637 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003638 []>, EVEX_4V, VEX_LIG, VEX_W,
3639 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3640}
3641
3642def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3643 Requires<[HasAVX512]>;
3644def : Pat<(fextend (loadf32 addr:$src)),
3645 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3646
3647def : Pat<(extloadf32 addr:$src),
3648 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3649 Requires<[HasAVX512, OptForSize]>;
3650
3651def : Pat<(extloadf32 addr:$src),
3652 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3653 Requires<[HasAVX512, OptForSpeed]>;
3654
3655def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3656 Requires<[HasAVX512]>;
3657
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003658multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003659 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3660 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3661 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003662let hasSideEffects = 0 in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003663 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003664 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003665 [(set DstRC:$dst,
3666 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003667 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003668 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003669 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003670 let mayLoad = 1 in
3671 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003672 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003673 [(set DstRC:$dst,
3674 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003675} // hasSideEffects = 0
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003676}
3677
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003678multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003679 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3680 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3681 Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003682let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003683 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003684 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003685 [(set DstRC:$dst,
3686 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3687 let mayLoad = 1 in
3688 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003689 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003690 [(set DstRC:$dst,
3691 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003692} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003693}
3694
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003695defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003696 memopv8f64, f512mem, v8f32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003697 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003698 EVEX_CD8<64, CD8VF>;
3699
3700defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3701 memopv4f64, f256mem, v8f64, v8f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003702 SSEPackedDouble>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003703 EVEX_CD8<32, CD8VH>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003704def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3705 (VCVTPS2PDZrm addr:$src)>;
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +00003706
3707def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3708 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3709 (VCVTPD2PSZrr VR512:$src)>;
3710
3711def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3712 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3713 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003714
3715//===----------------------------------------------------------------------===//
3716// AVX-512 Vector convert from sign integer to float/double
3717//===----------------------------------------------------------------------===//
3718
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003719defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003720 memopv8i64, i512mem, v16f32, v16i32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003721 SSEPackedSingle>, EVEX_V512, PS,
Craig Topperda7160d2014-02-01 08:17:56 +00003722 EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003723
3724defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3725 memopv4i64, i256mem, v8f64, v8i32,
3726 SSEPackedDouble>, EVEX_V512, XS,
3727 EVEX_CD8<32, CD8VH>;
3728
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003729defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003730 memopv16f32, f512mem, v16i32, v16f32,
3731 SSEPackedSingle>, EVEX_V512, XS,
3732 EVEX_CD8<32, CD8VF>;
3733
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003734defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003735 memopv8f64, f512mem, v8i32, v8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00003736 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003737 EVEX_CD8<64, CD8VF>;
3738
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003739defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003740 memopv16f32, f512mem, v16i32, v16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003741 SSEPackedSingle>, EVEX_V512, PS,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003742 EVEX_CD8<32, CD8VF>;
3743
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003744// cvttps2udq (src, 0, mask-all-ones, sae-current)
3745def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3746 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3747 (VCVTTPS2UDQZrr VR512:$src)>;
3748
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003749defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003750 memopv8f64, f512mem, v8i32, v8f64,
Craig Topper5ccb6172014-02-18 00:21:49 +00003751 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003752 EVEX_CD8<64, CD8VF>;
3753
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003754// cvttpd2udq (src, 0, mask-all-ones, sae-current)
3755def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3756 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3757 (VCVTTPD2UDQZrr VR512:$src)>;
3758
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003759defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3760 memopv4i64, f256mem, v8f64, v8i32,
3761 SSEPackedDouble>, EVEX_V512, XS,
3762 EVEX_CD8<32, CD8VH>;
3763
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003764defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003765 memopv16i32, f512mem, v16f32, v16i32,
3766 SSEPackedSingle>, EVEX_V512, XD,
3767 EVEX_CD8<32, CD8VF>;
3768
3769def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3770 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3771 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3772
Elena Demikhovsky3dcfbdf2014-04-08 07:24:02 +00003773def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3774 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3775 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3776
3777def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3778 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3779 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3780
3781def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3782 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3783 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003784
Cameron McInallyf10a7c92014-06-18 14:04:37 +00003785def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3786 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3787 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3788
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003789def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003790 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003791 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +00003792def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3793 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3794 (VCVTDQ2PDZrr VR256X:$src)>;
3795def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3796 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3797 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3798def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3799 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3800 (VCVTUDQ2PDZrr VR256X:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003801
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003802multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3803 RegisterClass DstRC, PatFrag mem_frag,
3804 X86MemOperand x86memop, Domain d> {
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003805let hasSideEffects = 0 in {
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003806 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003807 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003808 [], d>, EVEX;
3809 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003810 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003811 [], d>, EVEX, EVEX_B, EVEX_RC;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003812 let mayLoad = 1 in
3813 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003814 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003815 [], d>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003816} // hasSideEffects = 0
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003817}
3818
3819defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
Craig Topperae11aed2014-01-14 07:41:20 +00003820 memopv16f32, f512mem, SSEPackedSingle>, PD,
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003821 EVEX_V512, EVEX_CD8<32, CD8VF>;
3822defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3823 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3824 EVEX_V512, EVEX_CD8<64, CD8VF>;
3825
3826def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3827 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3828 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3829
3830def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3831 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3832 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3833
3834defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3835 memopv16f32, f512mem, SSEPackedSingle>,
Craig Topper5ccb6172014-02-18 00:21:49 +00003836 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003837defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3838 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
Craig Topper5ccb6172014-02-18 00:21:49 +00003839 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00003840
3841def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3842 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3843 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3844
3845def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3846 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3847 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003848
3849let Predicates = [HasAVX512] in {
3850 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3851 (VCVTPD2PSZrm addr:$src)>;
3852 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3853 (VCVTPS2PDZrm addr:$src)>;
3854}
3855
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003856//===----------------------------------------------------------------------===//
3857// Half precision conversion instructions
3858//===----------------------------------------------------------------------===//
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003859multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3860 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003861 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3862 "vcvtph2ps\t{$src, $dst|$dst, $src}",
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003863 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003864 let hasSideEffects = 0, mayLoad = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003865 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3866 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3867}
3868
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003869multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3870 X86MemOperand x86memop> {
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003871 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3872 (ins srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003873 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3874 []>, EVEX;
Elena Demikhovskyf404e052014-01-05 14:21:07 +00003875 let hasSideEffects = 0, mayStore = 1 in
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003876 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3877 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003878 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003879}
3880
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003881defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003882 EVEX_CD8<32, CD8VH>;
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003883defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
Elena Demikhovskydd0794e2013-10-24 07:16:35 +00003884 EVEX_CD8<32, CD8VH>;
3885
Elena Demikhovskya30e4372014-02-05 07:05:03 +00003886def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3887 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3888 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3889
3890def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3891 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3892 (VCVTPH2PSZrr VR256X:$src)>;
3893
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003894let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3895 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
Craig Topper5ccb6172014-02-18 00:21:49 +00003896 "ucomiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003897 EVEX_CD8<32, CD8VT1>;
3898 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
Craig Topperae11aed2014-01-14 07:41:20 +00003899 "ucomisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003900 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3901 let Pattern = []<dag> in {
3902 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
Craig Topper5ccb6172014-02-18 00:21:49 +00003903 "comiss">, PS, EVEX, VEX_LIG,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003904 EVEX_CD8<32, CD8VT1>;
3905 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
Craig Topperae11aed2014-01-14 07:41:20 +00003906 "comisd">, PD, EVEX,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003907 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3908 }
Craig Topper9dd48c82014-01-02 17:28:14 +00003909 let isCodeGenOnly = 1 in {
3910 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003911 load, "ucomiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003912 EVEX_CD8<32, CD8VT1>;
3913 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003914 load, "ucomisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003915 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003916
Craig Topper9dd48c82014-01-02 17:28:14 +00003917 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
Craig Topper5ccb6172014-02-18 00:21:49 +00003918 load, "comiss">, PS, EVEX, VEX_LIG,
Craig Topper9dd48c82014-01-02 17:28:14 +00003919 EVEX_CD8<32, CD8VT1>;
3920 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
Craig Topperae11aed2014-01-14 07:41:20 +00003921 load, "comisd">, PD, EVEX,
Craig Topper9dd48c82014-01-02 17:28:14 +00003922 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3923 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003924}
3925
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003926/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3927multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3928 X86MemOperand x86memop> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003929 let hasSideEffects = 0 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003930 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3931 (ins RC:$src1, RC:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003932 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003933 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003934 let mayLoad = 1 in {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003935 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3936 (ins RC:$src1, x86memop:$src2),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003937 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003938 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003939 }
3940}
3941}
3942
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003943defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
3944 EVEX_CD8<32, CD8VT1>;
3945defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
3946 VEX_W, EVEX_CD8<64, CD8VT1>;
3947defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
3948 EVEX_CD8<32, CD8VT1>;
3949defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
3950 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003951
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003952def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
3953 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3954 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3955 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00003956
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003957def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
3958 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3959 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3960 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003961
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003962def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
3963 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3964 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3965 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003966
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003967def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
3968 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3969 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3970 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00003971
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003972/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
3973multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3974 RegisterClass RC, X86MemOperand x86memop,
3975 PatFrag mem_frag, ValueType OpVt> {
3976 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3977 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003978 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003979 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
3980 EVEX;
3981 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00003982 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00003983 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
3984 EVEX;
3985}
3986defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
3987 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3988defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
3989 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3990defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
3991 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3992defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
3993 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3994
3995def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
3996 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3997 (VRSQRT14PSZr VR512:$src)>;
3998def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
3999 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4000 (VRSQRT14PDZr VR512:$src)>;
4001
4002def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4003 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4004 (VRCP14PSZr VR512:$src)>;
4005def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4006 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4007 (VRCP14PDZr VR512:$src)>;
4008
4009/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4010multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4011 X86MemOperand x86memop> {
4012 let hasSideEffects = 0, Predicates = [HasERI] in {
4013 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4014 (ins RC:$src1, RC:$src2),
4015 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004016 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004017 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4018 (ins RC:$src1, RC:$src2),
4019 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004020 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004021 []>, EVEX_4V, EVEX_B;
4022 let mayLoad = 1 in {
4023 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4024 (ins RC:$src1, x86memop:$src2),
4025 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004026 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004027 }
4028}
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004029}
4030
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004031defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4032 EVEX_CD8<32, CD8VT1>;
4033defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4034 VEX_W, EVEX_CD8<64, CD8VT1>;
4035defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4036 EVEX_CD8<32, CD8VT1>;
4037defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4038 VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004039
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004040def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4041 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4042 FROUND_NO_EXC)),
4043 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4044 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4045
4046def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4047 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4048 FROUND_NO_EXC)),
4049 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4050 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4051
4052def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4053 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4054 FROUND_NO_EXC)),
4055 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4056 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4057
4058def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4059 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4060 FROUND_NO_EXC)),
4061 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4062 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4063
4064/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4065multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4066 RegisterClass RC, X86MemOperand x86memop> {
4067 let hasSideEffects = 0, Predicates = [HasERI] in {
4068 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4069 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004070 " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004071 []>, EVEX;
4072 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4073 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004074 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004075 []>, EVEX, EVEX_B;
4076 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004077 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004078 []>, EVEX;
4079 }
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004080}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004081defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4082 EVEX_V512, EVEX_CD8<32, CD8VF>;
4083defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4084 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4085defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4086 EVEX_V512, EVEX_CD8<32, CD8VF>;
4087defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4088 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4089
4090def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4091 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4092 (VRSQRT28PSZrb VR512:$src)>;
4093def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4094 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4095 (VRSQRT28PDZrb VR512:$src)>;
4096
4097def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4098 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4099 (VRCP28PSZrb VR512:$src)>;
4100def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4101 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4102 (VRCP28PDZrb VR512:$src)>;
4103
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004104multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004105 OpndItins itins_s, OpndItins itins_d> {
4106 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004107 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004108 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4109 EVEX, EVEX_V512;
4110
4111 let mayLoad = 1 in
4112 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004113 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004114 [(set VR512:$dst,
4115 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4116 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4117
4118 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004119 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004120 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4121 EVEX, EVEX_V512;
4122
4123 let mayLoad = 1 in
4124 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
Cameron McInally7b544f02014-02-19 15:16:09 +00004125 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004126 [(set VR512:$dst, (OpNode
4127 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4128 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4129
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004130}
4131
4132multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4133 Intrinsic F32Int, Intrinsic F64Int,
4134 OpndItins itins_s, OpndItins itins_d> {
4135 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4136 (ins FR32X:$src1, FR32X:$src2),
4137 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004138 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004139 [], itins_s.rr>, XS, EVEX_4V;
Craig Topper9dd48c82014-01-02 17:28:14 +00004140 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004141 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4142 (ins VR128X:$src1, VR128X:$src2),
4143 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004144 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004145 [(set VR128X:$dst,
4146 (F32Int VR128X:$src1, VR128X:$src2))],
4147 itins_s.rr>, XS, EVEX_4V;
4148 let mayLoad = 1 in {
4149 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4150 (ins FR32X:$src1, f32mem:$src2),
4151 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004152 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004153 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004154 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004155 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4156 (ins VR128X:$src1, ssmem:$src2),
4157 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004158 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004159 [(set VR128X:$dst,
4160 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4161 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4162 }
4163 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4164 (ins FR64X:$src1, FR64X:$src2),
4165 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004166 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004167 XD, EVEX_4V, VEX_W;
Craig Topper9dd48c82014-01-02 17:28:14 +00004168 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004169 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4170 (ins VR128X:$src1, VR128X:$src2),
4171 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004172 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004173 [(set VR128X:$dst,
4174 (F64Int VR128X:$src1, VR128X:$src2))],
4175 itins_s.rr>, XD, EVEX_4V, VEX_W;
4176 let mayLoad = 1 in {
4177 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4178 (ins FR64X:$src1, f64mem:$src2),
4179 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004180 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004181 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
Craig Topper9dd48c82014-01-02 17:28:14 +00004182 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004183 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4184 (ins VR128X:$src1, sdmem:$src2),
4185 !strconcat(OpcodeStr,
Elena Demikhovskycf088092013-12-11 14:31:04 +00004186 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004187 [(set VR128X:$dst,
4188 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4189 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4190 }
4191}
4192
4193
4194defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4195 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4196 SSE_SQRTSS, SSE_SQRTSD>,
4197 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004198 SSE_SQRTPS, SSE_SQRTPD>;
4199
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004200let Predicates = [HasAVX512] in {
Elena Demikhovskyf1648592014-07-22 11:07:31 +00004201 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4202 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4203 (VSQRTPSZrr VR512:$src1)>;
4204 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4205 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4206 (VSQRTPDZrr VR512:$src1)>;
4207
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004208 def : Pat<(f32 (fsqrt FR32X:$src)),
4209 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4210 def : Pat<(f32 (fsqrt (load addr:$src))),
4211 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4212 Requires<[OptForSize]>;
4213 def : Pat<(f64 (fsqrt FR64X:$src)),
4214 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4215 def : Pat<(f64 (fsqrt (load addr:$src))),
4216 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4217 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004218
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004219 def : Pat<(f32 (X86frsqrt FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004220 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004221 def : Pat<(f32 (X86frsqrt (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004222 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004223 Requires<[OptForSize]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004224
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004225 def : Pat<(f32 (X86frcp FR32X:$src)),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004226 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004227 def : Pat<(f32 (X86frcp (load addr:$src))),
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +00004228 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
Elena Demikhovskya3a71402013-10-09 08:16:14 +00004229 Requires<[OptForSize]>;
4230
4231 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4232 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4233 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4234 VR128X)>;
4235 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4236 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4237
4238 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4239 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4240 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4241 VR128X)>;
4242 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4243 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4244}
4245
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004246
4247multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4248 X86MemOperand x86memop, RegisterClass RC,
4249 PatFrag mem_frag32, PatFrag mem_frag64,
4250 Intrinsic V4F32Int, Intrinsic V2F64Int,
4251 CD8VForm VForm> {
4252let ExeDomain = SSEPackedSingle in {
4253 // Intrinsic operation, reg.
4254 // Vector intrinsic operation, reg
4255 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4256 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4257 !strconcat(OpcodeStr,
4258 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4259 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4260
4261 // Vector intrinsic operation, mem
4262 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4263 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4264 !strconcat(OpcodeStr,
4265 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4266 [(set RC:$dst,
4267 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4268 EVEX_CD8<32, VForm>;
4269} // ExeDomain = SSEPackedSingle
4270
4271let ExeDomain = SSEPackedDouble in {
4272 // Vector intrinsic operation, reg
4273 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4274 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4275 !strconcat(OpcodeStr,
4276 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4277 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4278
4279 // Vector intrinsic operation, mem
4280 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4281 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4282 !strconcat(OpcodeStr,
4283 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4284 [(set RC:$dst,
4285 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4286 EVEX_CD8<64, VForm>;
4287} // ExeDomain = SSEPackedDouble
4288}
4289
4290multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4291 string OpcodeStr,
4292 Intrinsic F32Int,
4293 Intrinsic F64Int> {
4294let ExeDomain = GenericDomain in {
4295 // Operation, reg.
4296 let hasSideEffects = 0 in
4297 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4298 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4299 !strconcat(OpcodeStr,
4300 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4301 []>;
4302
4303 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004304 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004305 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4306 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4307 !strconcat(OpcodeStr,
4308 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4309 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4310
4311 // Intrinsic operation, mem.
4312 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4313 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4314 !strconcat(OpcodeStr,
4315 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4316 [(set VR128X:$dst, (F32Int VR128X:$src1,
4317 sse_load_f32:$src2, imm:$src3))]>,
4318 EVEX_CD8<32, CD8VT1>;
4319
4320 // Operation, reg.
4321 let hasSideEffects = 0 in
4322 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4323 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4324 !strconcat(OpcodeStr,
4325 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4326 []>, VEX_W;
4327
4328 // Intrinsic operation, reg.
Craig Topper9dd48c82014-01-02 17:28:14 +00004329 let isCodeGenOnly = 1 in
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004330 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4331 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4332 !strconcat(OpcodeStr,
4333 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4334 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4335 VEX_W;
4336
4337 // Intrinsic operation, mem.
4338 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4339 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4340 !strconcat(OpcodeStr,
4341 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4342 [(set VR128X:$dst,
4343 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4344 VEX_W, EVEX_CD8<64, CD8VT1>;
4345} // ExeDomain = GenericDomain
4346}
4347
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004348multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4349 X86MemOperand x86memop, RegisterClass RC,
4350 PatFrag mem_frag, Domain d> {
4351let ExeDomain = d in {
4352 // Intrinsic operation, reg.
4353 // Vector intrinsic operation, reg
4354 def r : AVX512AIi8<opc, MRMSrcReg,
4355 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4356 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004357 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004358 []>, EVEX;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004359
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004360 // Vector intrinsic operation, mem
4361 def m : AVX512AIi8<opc, MRMSrcMem,
4362 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4363 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004364 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004365 []>, EVEX;
4366} // ExeDomain
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004367}
4368
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004369
4370defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4371 memopv16f32, SSEPackedSingle>, EVEX_V512,
4372 EVEX_CD8<32, CD8VF>;
4373
4374def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004375 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004376 FROUND_CURRENT)),
4377 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4378
4379
4380defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4381 memopv8f64, SSEPackedDouble>, EVEX_V512,
4382 VEX_W, EVEX_CD8<64, CD8VF>;
4383
4384def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
Elena Demikhovskye73333a2014-05-04 13:35:37 +00004385 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004386 FROUND_CURRENT)),
4387 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4388
4389multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4390 Operand x86memop, RegisterClass RC, Domain d> {
4391let ExeDomain = d in {
4392 def r : AVX512AIi8<opc, MRMSrcReg,
4393 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4394 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004395 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004396 []>, EVEX_4V;
4397
4398 def m : AVX512AIi8<opc, MRMSrcMem,
4399 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4400 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004401 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004402 []>, EVEX_4V;
4403} // ExeDomain
4404}
4405
4406defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4407 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4408
4409defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4410 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4411
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004412def : Pat<(ffloor FR32X:$src),
4413 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4414def : Pat<(f64 (ffloor FR64X:$src)),
4415 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4416def : Pat<(f32 (fnearbyint FR32X:$src)),
4417 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4418def : Pat<(f64 (fnearbyint FR64X:$src)),
4419 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4420def : Pat<(f32 (fceil FR32X:$src)),
4421 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4422def : Pat<(f64 (fceil FR64X:$src)),
4423 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4424def : Pat<(f32 (frint FR32X:$src)),
4425 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4426def : Pat<(f64 (frint FR64X:$src)),
4427 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4428def : Pat<(f32 (ftrunc FR32X:$src)),
4429 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4430def : Pat<(f64 (ftrunc FR64X:$src)),
4431 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4432
4433def : Pat<(v16f32 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004434 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004435def : Pat<(v16f32 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004436 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004437def : Pat<(v16f32 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004438 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004439def : Pat<(v16f32 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004440 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004441def : Pat<(v16f32 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004442 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004443
4444def : Pat<(v8f64 (ffloor VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004445 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004446def : Pat<(v8f64 (fnearbyint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004447 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004448def : Pat<(v8f64 (fceil VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004449 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004450def : Pat<(v8f64 (frint VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004451 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004452def : Pat<(v8f64 (ftrunc VR512:$src)),
Elena Demikhovskyde3f7512014-01-01 15:12:34 +00004453 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004454
4455//-------------------------------------------------
4456// Integer truncate and extend operations
4457//-------------------------------------------------
4458
4459multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4460 RegisterClass dstRC, RegisterClass srcRC,
4461 RegisterClass KRC, X86MemOperand x86memop> {
4462 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4463 (ins srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004464 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004465 []>, EVEX;
4466
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004467 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4468 (ins KRC:$mask, srcRC:$src),
4469 !strconcat(OpcodeStr,
4470 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4471 []>, EVEX, EVEX_K;
4472
4473 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004474 (ins KRC:$mask, srcRC:$src),
4475 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004476 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004477 []>, EVEX, EVEX_KZ;
4478
4479 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004480 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004481 []>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004482
4483 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4484 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4485 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4486 []>, EVEX, EVEX_K;
4487
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004488}
4489defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4490 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4491defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4492 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4493defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4494 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4495defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4496 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4497defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4498 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4499defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4500 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4501defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4502 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4503defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4504 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4505defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4506 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4507defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4508 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4509defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4510 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4511defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4512 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4513defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4514 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4515defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4516 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4517defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4518 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4519
4520def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4521def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4522def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4523def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4524def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4525
4526def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004527 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004528def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004529 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004530def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004531 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004532def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004533 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004534
4535
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004536multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4537 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4538 PatFrag mem_frag, X86MemOperand x86memop,
4539 ValueType OpVT, ValueType InVT> {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004540
4541 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4542 (ins SrcRC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004543 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004544 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004545
4546 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4547 (ins KRC:$mask, SrcRC:$src),
4548 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4549 []>, EVEX, EVEX_K;
4550
4551 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4552 (ins KRC:$mask, SrcRC:$src),
4553 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4554 []>, EVEX, EVEX_KZ;
4555
4556 let mayLoad = 1 in {
4557 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004558 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004559 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004560 [(set DstRC:$dst,
4561 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4562 EVEX;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004563
4564 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4565 (ins KRC:$mask, x86memop:$src),
4566 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4567 []>,
4568 EVEX, EVEX_K;
4569
4570 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4571 (ins KRC:$mask, x86memop:$src),
4572 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4573 []>,
4574 EVEX, EVEX_KZ;
4575 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004576}
4577
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004578defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004579 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4580 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004581defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004582 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4583 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004584defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004585 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4586 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004587defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004588 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4589 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004590defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004591 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4592 EVEX_CD8<32, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004593
4594defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004595 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4596 EVEX_CD8<8, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004597defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004598 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4599 EVEX_CD8<8, CD8VO>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004600defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004601 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4602 EVEX_CD8<16, CD8VH>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004603defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004604 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4605 EVEX_CD8<16, CD8VQ>;
Robert Khasanov189e7fd2014-04-22 11:36:19 +00004606defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004607 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4608 EVEX_CD8<32, CD8VH>;
4609
4610//===----------------------------------------------------------------------===//
4611// GATHER - SCATTER Operations
4612
4613multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4614 RegisterClass RC, X86MemOperand memop> {
4615let mayLoad = 1,
4616 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4617 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4618 (ins RC:$src1, KRC:$mask, memop:$src2),
4619 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004620 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004621 []>, EVEX, EVEX_K;
4622}
Cameron McInally45325962014-03-26 13:50:50 +00004623
4624let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004625defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4626 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004627defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4628 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004629}
4630
4631let ExeDomain = SSEPackedSingle in {
4632defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4633 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004634defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4635 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004636}
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004637
4638defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4639 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4640defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4641 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4642
4643defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4644 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4645defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4646 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4647
4648multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4649 RegisterClass RC, X86MemOperand memop> {
4650let mayStore = 1, Constraints = "$mask = $mask_wb" in
4651 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4652 (ins memop:$dst, KRC:$mask, RC:$src2),
4653 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004654 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004655 []>, EVEX, EVEX_K;
4656}
4657
Cameron McInally45325962014-03-26 13:50:50 +00004658let ExeDomain = SSEPackedDouble in {
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004659defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4660 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004661defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4662 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004663}
4664
4665let ExeDomain = SSEPackedSingle in {
4666defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4667 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004668defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4669 EVEX_V512, EVEX_CD8<32, CD8VT1>;
Cameron McInally45325962014-03-26 13:50:50 +00004670}
4671
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004672defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4673 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4674defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4675 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4676
4677defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4678 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4679defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4680 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4681
Elena Demikhovsky8e8fde82014-05-12 07:18:51 +00004682// prefetch
4683multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4684 RegisterClass KRC, X86MemOperand memop> {
4685 let Predicates = [HasPFI], hasSideEffects = 1 in
4686 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4687 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4688 []>, EVEX, EVEX_K;
4689}
4690
4691defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4692 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4693
4694defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4695 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4696
4697defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4698 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4699
4700defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4701 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4702
4703defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4704 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4705
4706defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4707 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4708
4709defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4710 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4711
4712defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4713 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4714
4715defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4716 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4717
4718defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4719 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4720
4721defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4722 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4723
4724defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4725 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4726
4727defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4728 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4729
4730defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4731 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4732
4733defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4734 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4735
4736defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4737 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004738//===----------------------------------------------------------------------===//
4739// VSHUFPS - VSHUFPD Operations
4740
4741multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4742 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4743 Domain d> {
4744 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4745 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4746 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004747 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004748 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4749 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004750 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004751 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4752 (ins RC:$src1, RC:$src2, i8imm:$src3),
4753 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004754 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004755 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4756 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
Elena Demikhovskyb30371c2013-10-02 06:39:07 +00004757 EVEX_4V, Sched<[WriteShuffle]>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004758}
4759
4760defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
Craig Topper5ccb6172014-02-18 00:21:49 +00004761 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004762defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
Craig Topperae11aed2014-01-14 07:41:20 +00004763 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004764
Elena Demikhovsky462a2d22013-10-06 06:11:18 +00004765def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4766 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4767def : Pat<(v16i32 (X86Shufp VR512:$src1,
4768 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4769 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4770
4771def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4772 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4773def : Pat<(v8i64 (X86Shufp VR512:$src1,
4774 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4775 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004776
Adam Nemet5ed17da2014-08-21 19:50:07 +00004777multiclass avx512_valign<X86VectorVTInfo _> {
4778 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4779 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4780 "valign"##_.Suffix,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004781 "$src3, $src2, $src1", "$src1, $src2, $src3",
Adam Nemet5ed17da2014-08-21 19:50:07 +00004782 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4783 (i8 imm:$src3))),
4784 _.VT, _.RC, _.KRCWM>,
Adam Nemet2e2537f2014-08-07 17:53:55 +00004785 AVX512AIi8Base, EVEX_4V;
Adam Nemetfd2161b2014-08-05 17:23:04 +00004786
Adam Nemetf92139d2014-08-05 17:22:50 +00004787 // Also match valign of packed floats.
Adam Nemet5ed17da2014-08-21 19:50:07 +00004788 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4789 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
Adam Nemetf92139d2014-08-05 17:22:50 +00004790
Elena Demikhovsky0a74b7d2013-11-14 11:29:27 +00004791 let mayLoad = 1 in
Adam Nemet5ed17da2014-08-21 19:50:07 +00004792 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4793 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4794 !strconcat("valign"##_.Suffix,
Adam Nemet1c752d82014-08-05 17:22:47 +00004795 " \t{$src3, $src2, $src1, $dst|"
4796 "$dst, $src1, $src2, $src3}"),
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004797 []>, EVEX_4V;
4798}
Adam Nemet5ed17da2014-08-21 19:50:07 +00004799defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4800defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004801
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004802// Helper fragments to match sext vXi1 to vXiY.
4803def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4804def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4805
4806multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4807 RegisterClass KRC, RegisterClass RC,
4808 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4809 string BrdcstStr> {
4810 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4811 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4812 []>, EVEX;
4813 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4814 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4815 []>, EVEX, EVEX_K;
4816 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4817 !strconcat(OpcodeStr,
4818 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4819 []>, EVEX, EVEX_KZ;
4820 let mayLoad = 1 in {
4821 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4822 (ins x86memop:$src),
4823 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4824 []>, EVEX;
4825 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4826 (ins KRC:$mask, x86memop:$src),
4827 !strconcat(OpcodeStr,
4828 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4829 []>, EVEX, EVEX_K;
4830 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4831 (ins KRC:$mask, x86memop:$src),
4832 !strconcat(OpcodeStr,
4833 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4834 []>, EVEX, EVEX_KZ;
4835 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4836 (ins x86scalar_mop:$src),
4837 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4838 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4839 []>, EVEX, EVEX_B;
4840 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4841 (ins KRC:$mask, x86scalar_mop:$src),
4842 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4843 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4844 []>, EVEX, EVEX_B, EVEX_K;
4845 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4846 (ins KRC:$mask, x86scalar_mop:$src),
4847 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4848 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4849 BrdcstStr, "}"),
4850 []>, EVEX, EVEX_B, EVEX_KZ;
4851 }
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004852}
4853
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004854defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4855 i512mem, i32mem, "{1to16}">, EVEX_V512,
4856 EVEX_CD8<32, CD8VF>;
4857defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4858 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4859 EVEX_CD8<64, CD8VF>;
4860
4861def : Pat<(xor
4862 (bc_v16i32 (v16i1sextv16i32)),
4863 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4864 (VPABSDZrr VR512:$src)>;
4865def : Pat<(xor
4866 (bc_v8i64 (v8i1sextv8i64)),
4867 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4868 (VPABSQZrr VR512:$src)>;
Elena Demikhovskyac3e8eb2013-09-17 07:34:34 +00004869
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004870def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4871 (v16i32 immAllZerosV), (i16 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004872 (VPABSDZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004873def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4874 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
Elena Demikhovskybb2f6b72014-03-27 09:45:08 +00004875 (VPABSQZrr VR512:$src)>;
Elena Demikhovsky172a27c2014-01-08 10:54:22 +00004876
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004877multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004878 RegisterClass RC, RegisterClass KRC,
4879 X86MemOperand x86memop,
4880 X86MemOperand x86scalar_mop, string BrdcstStr> {
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004881 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4882 (ins RC:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004883 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004884 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004885 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4886 (ins x86memop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004887 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004888 []>, EVEX;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004889 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4890 (ins x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004891 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004892 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4893 []>, EVEX, EVEX_B;
4894 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4895 (ins KRC:$mask, RC:$src),
4896 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004897 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004898 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004899 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4900 (ins KRC:$mask, x86memop:$src),
4901 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004902 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004903 []>, EVEX, EVEX_KZ;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004904 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4905 (ins KRC:$mask, x86scalar_mop:$src),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004906 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004907 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4908 BrdcstStr, "}"),
4909 []>, EVEX, EVEX_KZ, EVEX_B;
4910
4911 let Constraints = "$src1 = $dst" in {
4912 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4913 (ins RC:$src1, KRC:$mask, RC:$src2),
4914 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004915 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004916 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004917 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4918 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4919 !strconcat(OpcodeStr,
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004920 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004921 []>, EVEX, EVEX_K;
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004922 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4923 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
Elena Demikhovskya5d38a32014-01-23 14:27:26 +00004924 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004925 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4926 []>, EVEX, EVEX_K, EVEX_B;
4927 }
4928}
4929
4930let Predicates = [HasCDI] in {
4931defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004932 i512mem, i32mem, "{1to16}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004933 EVEX_V512, EVEX_CD8<32, CD8VF>;
4934
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004935
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004936defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004937 i512mem, i64mem, "{1to8}">,
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004938 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004939
Elena Demikhovskydacddb02013-11-03 13:46:31 +00004940}
Elena Demikhovsky6270b382013-12-10 11:58:35 +00004941
4942def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
4943 GR16:$mask),
4944 (VPCONFLICTDrrk VR512:$src1,
4945 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4946
4947def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
4948 GR8:$mask),
4949 (VPCONFLICTQrrk VR512:$src1,
4950 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00004951
Cameron McInally5d1b7b92014-06-11 12:54:45 +00004952let Predicates = [HasCDI] in {
4953defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
4954 i512mem, i32mem, "{1to16}">,
4955 EVEX_V512, EVEX_CD8<32, CD8VF>;
4956
4957
4958defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
4959 i512mem, i64mem, "{1to8}">,
4960 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4961
4962}
4963
4964def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
4965 GR16:$mask),
4966 (VPLZCNTDrrk VR512:$src1,
4967 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4968
4969def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
4970 GR8:$mask),
4971 (VPLZCNTQrrk VR512:$src1,
4972 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
4973
Cameron McInally0d0489c2014-06-16 14:12:28 +00004974def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
4975 (VPLZCNTDrm addr:$src)>;
4976def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
4977 (VPLZCNTDrr VR512:$src)>;
4978def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
4979 (VPLZCNTQrm addr:$src)>;
4980def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
4981 (VPLZCNTQrr VR512:$src)>;
4982
Elena Demikhovskycf0b9ba2014-04-09 12:37:50 +00004983def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4984def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4985def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
Elena Demikhovskyacc5c9e2014-04-22 14:13:10 +00004986
4987def : Pat<(store VK1:$src, addr:$dst),
4988 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
4989
4990def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
4991 (truncstore node:$val, node:$ptr), [{
4992 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
4993}]>;
4994
4995def : Pat<(truncstorei1 GR8:$src, addr:$dst),
4996 (MOV8mr addr:$dst, GR8:$src)>;
4997